diff --git a/charm/lib/charms/data_platform_libs/v0/data_interfaces.py b/charm/lib/charms/data_platform_libs/v0/data_interfaces.py deleted file mode 100644 index c940cc0..0000000 --- a/charm/lib/charms/data_platform_libs/v0/data_interfaces.py +++ /dev/null @@ -1,2684 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Library to manage the relation for the data-platform products. - -This library contains the Requires and Provides classes for handling the relation -between an application and multiple managed application supported by the data-team: -MySQL, Postgresql, MongoDB, Redis, and Kafka. - -### Database (MySQL, Postgresql, MongoDB, and Redis) - -#### Requires Charm -This library is a uniform interface to a selection of common database -metadata, with added custom events that add convenience to database management, -and methods to consume the application related data. - - -Following an example of using the DatabaseCreatedEvent, in the context of the -application charm code: - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - DatabaseCreatedEvent, - DatabaseRequires, -) - -class ApplicationCharm(CharmBase): - # Application charm that connects to database charms. - - def __init__(self, *args): - super().__init__(*args) - - # Charm events defined in the database requires charm library. - self.database = DatabaseRequires(self, relation_name="database", database_name="database") - self.framework.observe(self.database.on.database_created, self._on_database_created) - - def _on_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - - # Start application with rendered configuration - self._start_application(config_file) - - # Set active status - self.unit.status = ActiveStatus("received database credentials") -``` - -As shown above, the library provides some custom events to handle specific situations, -which are listed below: - -- database_created: event emitted when the requested database is created. -- endpoints_changed: event emitted when the read/write endpoints of the database have changed. -- read_only_endpoints_changed: event emitted when the read-only endpoints of the database - have changed. Event is not triggered if read/write endpoints changed too. - -If it is needed to connect multiple database clusters to the same relation endpoint -the application charm can implement the same code as if it would connect to only -one database cluster (like the above code example). - -To differentiate multiple clusters connected to the same relation endpoint -the application charm can use the name of the remote application: - -```python - -def _on_database_created(self, event: DatabaseCreatedEvent) -> None: - # Get the remote app name of the cluster that triggered this event - cluster = event.relation.app.name -``` - -It is also possible to provide an alias for each different database cluster/relation. - -So, it is possible to differentiate the clusters in two ways. -The first is to use the remote application name, i.e., `event.relation.app.name`, as above. - -The second way is to use different event handlers to handle each cluster events. -The implementation would be something like the following code: - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - DatabaseCreatedEvent, - DatabaseRequires, -) - -class ApplicationCharm(CharmBase): - # Application charm that connects to database charms. - - def __init__(self, *args): - super().__init__(*args) - - # Define the cluster aliases and one handler for each cluster database created event. - self.database = DatabaseRequires( - self, - relation_name="database", - database_name="database", - relations_aliases = ["cluster1", "cluster2"], - ) - self.framework.observe( - self.database.on.cluster1_database_created, self._on_cluster1_database_created - ) - self.framework.observe( - self.database.on.cluster2_database_created, self._on_cluster2_database_created - ) - - def _on_cluster1_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database on the cluster named cluster1 - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - ... - - def _on_cluster2_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database on the cluster named cluster2 - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - ... - -``` - -When it's needed to check whether a plugin (extension) is enabled on the PostgreSQL -charm, you can use the is_postgresql_plugin_enabled method. To use that, you need to -add the following dependency to your charmcraft.yaml file: - -```yaml - -parts: - charm: - charm-binary-python-packages: - - psycopg[binary] - -``` - -### Provider Charm - -Following an example of using the DatabaseRequestedEvent, in the context of the -database charm code: - -```python -from charms.data_platform_libs.v0.data_interfaces import DatabaseProvides - -class SampleCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - # Charm events defined in the database provides charm library. - self.provided_database = DatabaseProvides(self, relation_name="database") - self.framework.observe(self.provided_database.on.database_requested, - self._on_database_requested) - # Database generic helper - self.database = DatabaseHelper() - - def _on_database_requested(self, event: DatabaseRequestedEvent) -> None: - # Handle the event triggered by a new database requested in the relation - # Retrieve the database name using the charm library. - db_name = event.database - # generate a new user credential - username = self.database.generate_user() - password = self.database.generate_password() - # set the credentials for the relation - self.provided_database.set_credentials(event.relation.id, username, password) - # set other variables for the relation event.set_tls("False") -``` -As shown above, the library provides a custom event (database_requested) to handle -the situation when an application charm requests a new database to be created. -It's preferred to subscribe to this event instead of relation changed event to avoid -creating a new database when other information other than a database name is -exchanged in the relation databag. - -### Kafka - -This library is the interface to use and interact with the Kafka charm. This library contains -custom events that add convenience to manage Kafka, and provides methods to consume the -application related data. - -#### Requirer Charm - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - BootstrapServerChangedEvent, - KafkaRequires, - TopicCreatedEvent, -) - -class ApplicationCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - self.kafka = KafkaRequires(self, "kafka_client", "test-topic") - self.framework.observe( - self.kafka.on.bootstrap_server_changed, self._on_kafka_bootstrap_server_changed - ) - self.framework.observe( - self.kafka.on.topic_created, self._on_kafka_topic_created - ) - - def _on_kafka_bootstrap_server_changed(self, event: BootstrapServerChangedEvent): - # Event triggered when a bootstrap server was changed for this application - - new_bootstrap_server = event.bootstrap_server - ... - - def _on_kafka_topic_created(self, event: TopicCreatedEvent): - # Event triggered when a topic was created for this application - username = event.username - password = event.password - tls = event.tls - tls_ca= event.tls_ca - bootstrap_server event.bootstrap_server - consumer_group_prefic = event.consumer_group_prefix - zookeeper_uris = event.zookeeper_uris - ... - -``` - -As shown above, the library provides some custom events to handle specific situations, -which are listed below: - -- topic_created: event emitted when the requested topic is created. -- bootstrap_server_changed: event emitted when the bootstrap server have changed. -- credential_changed: event emitted when the credentials of Kafka changed. - -### Provider Charm - -Following the previous example, this is an example of the provider charm. - -```python -class SampleCharm(CharmBase): - -from charms.data_platform_libs.v0.data_interfaces import ( - KafkaProvides, - TopicRequestedEvent, -) - - def __init__(self, *args): - super().__init__(*args) - - # Default charm events. - self.framework.observe(self.on.start, self._on_start) - - # Charm events defined in the Kafka Provides charm library. - self.kafka_provider = KafkaProvides(self, relation_name="kafka_client") - self.framework.observe(self.kafka_provider.on.topic_requested, self._on_topic_requested) - # Kafka generic helper - self.kafka = KafkaHelper() - - def _on_topic_requested(self, event: TopicRequestedEvent): - # Handle the on_topic_requested event. - - topic = event.topic - relation_id = event.relation.id - # set connection info in the databag relation - self.kafka_provider.set_bootstrap_server(relation_id, self.kafka.get_bootstrap_server()) - self.kafka_provider.set_credentials(relation_id, username=username, password=password) - self.kafka_provider.set_consumer_group_prefix(relation_id, ...) - self.kafka_provider.set_tls(relation_id, "False") - self.kafka_provider.set_zookeeper_uris(relation_id, ...) - -``` -As shown above, the library provides a custom event (topic_requested) to handle -the situation when an application charm requests a new topic to be created. -It is preferred to subscribe to this event instead of relation changed event to avoid -creating a new topic when other information other than a topic name is -exchanged in the relation databag. -""" - -import copy -import json -import logging -from abc import ABC, abstractmethod -from collections import namedtuple -from datetime import datetime -from enum import Enum -from typing import Callable, Dict, List, Optional, Set, Tuple, Union - -from ops import JujuVersion, Secret, SecretInfo, SecretNotFoundError -from ops.charm import ( - CharmBase, - CharmEvents, - RelationChangedEvent, - RelationCreatedEvent, - RelationEvent, - SecretChangedEvent, -) -from ops.framework import EventSource, Object -from ops.model import Application, ModelError, Relation, Unit - -# The unique Charmhub library identifier, never change it -LIBID = "6c3e6b6680d64e9c89e611d1a15f65be" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 27 - -PYDEPS = ["ops>=2.0.0"] - -logger = logging.getLogger(__name__) - -Diff = namedtuple("Diff", "added changed deleted") -Diff.__doc__ = """ -A tuple for storing the diff between two data mappings. - -added - keys that were added -changed - keys that still exist but have new values -deleted - key that were deleted""" - - -PROV_SECRET_PREFIX = "secret-" -REQ_SECRET_FIELDS = "requested-secrets" - - -class SecretGroup(Enum): - """Secret groups as constants.""" - - USER = "user" - TLS = "tls" - EXTRA = "extra" - - -class DataInterfacesError(Exception): - """Common ancestor for DataInterfaces related exceptions.""" - - -class SecretError(Exception): - """Common ancestor for Secrets related exceptions.""" - - -class SecretAlreadyExistsError(SecretError): - """A secret that was to be added already exists.""" - - -class SecretsUnavailableError(SecretError): - """Secrets aren't yet available for Juju version used.""" - - -class SecretsIllegalUpdateError(SecretError): - """Secrets aren't yet available for Juju version used.""" - - -def get_encoded_dict( - relation: Relation, member: Union[Unit, Application], field: str -) -> Optional[Dict[str, str]]: - """Retrieve and decode an encoded field from relation data.""" - data = json.loads(relation.data[member].get(field, "{}")) - if isinstance(data, dict): - return data - logger.error("Unexpected datatype for %s instead of dict.", str(data)) - - -def get_encoded_list( - relation: Relation, member: Union[Unit, Application], field: str -) -> Optional[List[str]]: - """Retrieve and decode an encoded field from relation data.""" - data = json.loads(relation.data[member].get(field, "[]")) - if isinstance(data, list): - return data - logger.error("Unexpected datatype for %s instead of list.", str(data)) - - -def set_encoded_field( - relation: Relation, - member: Union[Unit, Application], - field: str, - value: Union[str, list, Dict[str, str]], -) -> None: - """Set an encoded field from relation data.""" - relation.data[member].update({field: json.dumps(value)}) - - -def diff(event: RelationChangedEvent, bucket: Union[Unit, Application]) -> Diff: - """Retrieves the diff of the data in the relation changed databag. - - Args: - event: relation changed event. - bucket: bucket of the databag (app or unit) - - Returns: - a Diff instance containing the added, deleted and changed - keys from the event relation databag. - """ - # Retrieve the old data from the data key in the application relation databag. - old_data = get_encoded_dict(event.relation, bucket, "data") - - if not old_data: - old_data = {} - - # Retrieve the new data from the event relation databag. - new_data = ( - {key: value for key, value in event.relation.data[event.app].items() if key != "data"} - if event.app - else {} - ) - - # These are the keys that were added to the databag and triggered this event. - added = new_data.keys() - old_data.keys() # pyright: ignore [reportAssignmentType] - # These are the keys that were removed from the databag and triggered this event. - deleted = old_data.keys() - new_data.keys() # pyright: ignore [reportAssignmentType] - # These are the keys that already existed in the databag, - # but had their values changed. - changed = { - key - for key in old_data.keys() & new_data.keys() # pyright: ignore [reportAssignmentType] - if old_data[key] != new_data[key] # pyright: ignore [reportAssignmentType] - } - # Convert the new_data to a serializable format and save it for a next diff check. - set_encoded_field(event.relation, bucket, "data", new_data) - - # Return the diff with all possible changes. - return Diff(added, changed, deleted) - - -def leader_only(f): - """Decorator to ensure that only leader can perform given operation.""" - - def wrapper(self, *args, **kwargs): - if self.component == self.local_app and not self.local_unit.is_leader(): - logger.error( - "This operation (%s()) can only be performed by the leader unit", f.__name__ - ) - return - return f(self, *args, **kwargs) - - return wrapper - - -def juju_secrets_only(f): - """Decorator to ensure that certain operations would be only executed on Juju3.""" - - def wrapper(self, *args, **kwargs): - if not self.secrets_enabled: - raise SecretsUnavailableError("Secrets unavailable on current Juju version") - return f(self, *args, **kwargs) - - return wrapper - - -class Scope(Enum): - """Peer relations scope.""" - - APP = "app" - UNIT = "unit" - - -class CachedSecret: - """Locally cache a secret. - - The data structure is precisely re-using/simulating as in the actual Secret Storage - """ - - def __init__( - self, - charm: CharmBase, - component: Union[Application, Unit], - label: str, - secret_uri: Optional[str] = None, - ): - self._secret_meta = None - self._secret_content = {} - self._secret_uri = secret_uri - self.label = label - self.charm = charm - self.component = component - - def add_secret(self, content: Dict[str, str], relation: Relation) -> Secret: - """Create a new secret.""" - if self._secret_uri: - raise SecretAlreadyExistsError( - "Secret is already defined with uri %s", self._secret_uri - ) - - secret = self.component.add_secret(content, label=self.label) - if relation.app != self.charm.app: - # If it's not a peer relation, grant is to be applied - secret.grant(relation) - self._secret_uri = secret.id - self._secret_meta = secret - return self._secret_meta - - @property - def meta(self) -> Optional[Secret]: - """Getting cached secret meta-information.""" - if not self._secret_meta: - if not (self._secret_uri or self.label): - return - try: - self._secret_meta = self.charm.model.get_secret(label=self.label) - except SecretNotFoundError: - if self._secret_uri: - self._secret_meta = self.charm.model.get_secret( - id=self._secret_uri, label=self.label - ) - return self._secret_meta - - def get_content(self) -> Dict[str, str]: - """Getting cached secret content.""" - if not self._secret_content: - if self.meta: - try: - self._secret_content = self.meta.get_content(refresh=True) - except (ValueError, ModelError) as err: - # https://bugs.launchpad.net/juju/+bug/2042596 - # Only triggered when 'refresh' is set - known_model_errors = [ - "ERROR either URI or label should be used for getting an owned secret but not both", - "ERROR secret owner cannot use --refresh", - ] - if isinstance(err, ModelError) and not any( - msg in str(err) for msg in known_model_errors - ): - raise - # Due to: ValueError: Secret owner cannot use refresh=True - self._secret_content = self.meta.get_content() - return self._secret_content - - def set_content(self, content: Dict[str, str]) -> None: - """Setting cached secret content.""" - if not self.meta: - return - - if content: - self.meta.set_content(content) - self._secret_content = content - else: - self.meta.remove_all_revisions() - - def get_info(self) -> Optional[SecretInfo]: - """Wrapper function to apply the corresponding call on the Secret object within CachedSecret if any.""" - if self.meta: - return self.meta.get_info() - - -class SecretCache: - """A data structure storing CachedSecret objects.""" - - def __init__(self, charm: CharmBase, component: Union[Application, Unit]): - self.charm = charm - self.component = component - self._secrets: Dict[str, CachedSecret] = {} - - def get(self, label: str, uri: Optional[str] = None) -> Optional[CachedSecret]: - """Getting a secret from Juju Secret store or cache.""" - if not self._secrets.get(label): - secret = CachedSecret(self.charm, self.component, label, uri) - if secret.meta: - self._secrets[label] = secret - return self._secrets.get(label) - - def add(self, label: str, content: Dict[str, str], relation: Relation) -> CachedSecret: - """Adding a secret to Juju Secret.""" - if self._secrets.get(label): - raise SecretAlreadyExistsError(f"Secret {label} already exists") - - secret = CachedSecret(self.charm, self.component, label) - secret.add_secret(content, relation) - self._secrets[label] = secret - return self._secrets[label] - - -# Base DataRelation - - -class DataRelation(Object, ABC): - """Base relation data mainpulation (abstract) class.""" - - SCOPE = Scope.APP - - # Local map to associate mappings with secrets potentially as a group - SECRET_LABEL_MAP = { - "username": SecretGroup.USER, - "password": SecretGroup.USER, - "uris": SecretGroup.USER, - "tls": SecretGroup.TLS, - "tls-ca": SecretGroup.TLS, - } - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - super().__init__(charm, relation_name) - self.charm = charm - self.local_app = self.charm.model.app - self.local_unit = self.charm.unit - self.relation_name = relation_name - self.framework.observe( - charm.on[relation_name].relation_changed, - self._on_relation_changed_event, - ) - self._jujuversion = None - self.component = self.local_app if self.SCOPE == Scope.APP else self.local_unit - self.secrets = SecretCache(self.charm, self.component) - - @property - def relations(self) -> List[Relation]: - """The list of Relation instances associated with this relation_name.""" - return [ - relation - for relation in self.charm.model.relations[self.relation_name] - if self._is_relation_active(relation) - ] - - @property - def secrets_enabled(self): - """Is this Juju version allowing for Secrets usage?""" - if not self._jujuversion: - self._jujuversion = JujuVersion.from_environ() - return self._jujuversion.has_secrets - - # Mandatory overrides for internal/helper methods - - @abstractmethod - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation data has changed.""" - raise NotImplementedError - - @abstractmethod - def _get_relation_secret( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - raise NotImplementedError - - @abstractmethod - def _fetch_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation.""" - raise NotImplementedError - - @abstractmethod - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - @abstractmethod - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - @abstractmethod - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - # Internal helper methods - - @staticmethod - def _is_relation_active(relation: Relation): - """Whether the relation is active based on contained data.""" - try: - _ = repr(relation.data) - return True - except (RuntimeError, ModelError): - return False - - @staticmethod - def _is_secret_field(field: str) -> bool: - """Is the field in question a secret reference (URI) field or not?""" - return field.startswith(PROV_SECRET_PREFIX) - - @staticmethod - def _generate_secret_label( - relation_name: str, relation_id: int, group_mapping: SecretGroup - ) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{relation_name}.{relation_id}.{group_mapping.value}.secret" - - def _generate_secret_field_name(self, group_mapping: SecretGroup) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{PROV_SECRET_PREFIX}{group_mapping.value}" - - def _relation_from_secret_label(self, secret_label: str) -> Optional[Relation]: - """Retrieve the relation that belongs to a secret label.""" - contents = secret_label.split(".") - - if not (contents and len(contents) >= 3): - return - - contents.pop() # ".secret" at the end - contents.pop() # Group mapping - relation_id = contents.pop() - try: - relation_id = int(relation_id) - except ValueError: - return - - # In case '.' character appeared in relation name - relation_name = ".".join(contents) - - try: - return self.get_relation(relation_name, relation_id) - except ModelError: - return - - @classmethod - def _group_secret_fields(cls, secret_fields: List[str]) -> Dict[SecretGroup, List[str]]: - """Helper function to arrange secret mappings under their group. - - NOTE: All unrecognized items end up in the 'extra' secret bucket. - Make sure only secret fields are passed! - """ - secret_fieldnames_grouped = {} - for key in secret_fields: - if group := cls.SECRET_LABEL_MAP.get(key): - secret_fieldnames_grouped.setdefault(group, []).append(key) - else: - secret_fieldnames_grouped.setdefault(SecretGroup.EXTRA, []).append(key) - return secret_fieldnames_grouped - - def _get_group_secret_contents( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Optional[Union[Set[str], List[str]]] = None, - ) -> Dict[str, str]: - """Helper function to retrieve collective, requested contents of a secret.""" - if not secret_fields: - secret_fields = [] - - if (secret := self._get_relation_secret(relation.id, group)) and ( - secret_data := secret.get_content() - ): - return {k: v for k, v in secret_data.items() if k in secret_fields} - return {} - - @classmethod - def _content_for_secret_group( - cls, content: Dict[str, str], secret_fields: Set[str], group_mapping: SecretGroup - ) -> Dict[str, str]: - """Select : pairs from input, that belong to this particular Secret group.""" - if group_mapping == SecretGroup.EXTRA: - return { - k: v - for k, v in content.items() - if k in secret_fields and k not in cls.SECRET_LABEL_MAP.keys() - } - - return { - k: v - for k, v in content.items() - if k in secret_fields and cls.SECRET_LABEL_MAP.get(k) == group_mapping - } - - @juju_secrets_only - def _get_relation_secret_data( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[Dict[str, str]]: - """Retrieve contents of a Juju Secret that's been stored in the relation databag.""" - secret = self._get_relation_secret(relation_id, group_mapping, relation_name) - if secret: - return secret.get_content() - - # Core operations on Relation Fields manipulations (regardless whether the field is in the databag or in a secret) - # Internal functions to be called directly from transparent public interface functions (+closely related helpers) - - def _process_secret_fields( - self, - relation: Relation, - req_secret_fields: Optional[List[str]], - impacted_rel_fields: List[str], - operation: Callable, - *args, - **kwargs, - ) -> Tuple[Dict[str, str], Set[str]]: - """Isolate target secret fields of manipulation, and execute requested operation by Secret Group.""" - result = {} - - # If the relation started on a databag, we just stay on the databag - # (Rolling upgrades may result in a relation starting on databag, getting secrets enabled on-the-fly) - # self.local_app is sufficient to check (ignored if Requires, never has secrets -- works if Provides) - fallback_to_databag = ( - req_secret_fields - and self.local_unit.is_leader() - and set(req_secret_fields) & set(relation.data[self.component]) - ) - - normal_fields = set(impacted_rel_fields) - if req_secret_fields and self.secrets_enabled and not fallback_to_databag: - normal_fields = normal_fields - set(req_secret_fields) - secret_fields = set(impacted_rel_fields) - set(normal_fields) - - secret_fieldnames_grouped = self._group_secret_fields(list(secret_fields)) - - for group in secret_fieldnames_grouped: - # operation() should return nothing when all goes well - if group_result := operation(relation, group, secret_fields, *args, **kwargs): - # If "meaningful" data was returned, we take it. (Some 'operation'-s only return success/failure.) - if isinstance(group_result, dict): - result.update(group_result) - else: - # If it wasn't found as a secret, let's give it a 2nd chance as "normal" field - # Needed when Juju3 Requires meets Juju2 Provider - normal_fields |= set(secret_fieldnames_grouped[group]) - return (result, normal_fields) - - def _fetch_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetching databag contents when no secrets are involved. - - Since the Provider's databag is the only one holding secrest, we can apply - a simplified workflow to read the Require's side's databag. - This is used typically when the Provides side wants to read the Requires side's data, - or when the Requires side may want to read its own data. - """ - if component not in relation.data or not relation.data[component]: - return {} - - if fields: - return { - k: relation.data[component][k] for k in fields if k in relation.data[component] - } - else: - return dict(relation.data[component]) - - def _fetch_relation_data_with_secrets( - self, - component: Union[Application, Unit], - req_secret_fields: Optional[List[str]], - relation: Relation, - fields: Optional[List[str]] = None, - ) -> Dict[str, str]: - """Fetching databag contents when secrets may be involved. - - This function has internal logic to resolve if a requested field may be "hidden" - within a Relation Secret, or directly available as a databag field. Typically - used to read the Provides side's databag (eigher by the Requires side, or by - Provides side itself). - """ - result = {} - normal_fields = [] - - if not fields: - if component not in relation.data or not relation.data[component]: - return {} - - all_fields = list(relation.data[component].keys()) - normal_fields = [field for field in all_fields if not self._is_secret_field(field)] - - # There must have been secrets there - if all_fields != normal_fields and req_secret_fields: - # So we assemble the full fields list (without 'secret-' fields) - fields = normal_fields + req_secret_fields - - if fields: - result, normal_fields = self._process_secret_fields( - relation, req_secret_fields, fields, self._get_group_secret_contents - ) - - # Processing "normal" fields. May include leftover from what we couldn't retrieve as a secret. - # (Typically when Juju3 Requires meets Juju2 Provides) - if normal_fields: - result.update( - self._fetch_relation_data_without_secrets(component, relation, list(normal_fields)) - ) - return result - - def _update_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, data: Dict[str, str] - ) -> None: - """Updating databag contents when no secrets are involved.""" - if component not in relation.data or relation.data[component] is None: - return - - if relation: - relation.data[component].update(data) - - def _delete_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, fields: List[str] - ) -> None: - """Remove databag fields 'fields' from Relation.""" - if component not in relation.data or relation.data[component] is None: - return - - for field in fields: - try: - relation.data[component].pop(field) - except KeyError: - logger.error( - "Non-existing field '%s' was attempted to be removed from the databag (relation ID: %s)", - str(field), - str(relation.id), - ) - pass - - # Public interface methods - # Handling Relation Fields seamlessly, regardless if in databag or a Juju Secret - - def get_relation(self, relation_name, relation_id) -> Relation: - """Safe way of retrieving a relation.""" - relation = self.charm.model.get_relation(relation_name, relation_id) - - if not relation: - raise DataInterfacesError( - "Relation %s %s couldn't be retrieved", relation_name, relation_id - ) - - return relation - - def fetch_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Dict[int, Dict[str, str]]: - """Retrieves data from relation. - - This function can be used to retrieve data from a relation - in the charm code when outside an event callback. - Function cannot be used in `*-relation-broken` events and will raise an exception. - - Returns: - a dict of the values stored in the relation data bag - for all relation instances (indexed by the relation ID). - """ - if not relation_name: - relation_name = self.relation_name - - relations = [] - if relation_ids: - relations = [ - self.get_relation(relation_name, relation_id) for relation_id in relation_ids - ] - else: - relations = self.relations - - data = {} - for relation in relations: - if not relation_ids or (relation_ids and relation.id in relation_ids): - data[relation.id] = self._fetch_specific_relation_data(relation, fields) - return data - - def fetch_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """Get a single field from the relation data.""" - return ( - self.fetch_relation_data([relation_id], [field], relation_name) - .get(relation_id, {}) - .get(field) - ) - - def fetch_my_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Optional[Dict[int, Dict[str, str]]]: - """Fetch data of the 'owner' (or 'this app') side of the relation. - - NOTE: Since only the leader can read the relation's 'this_app'-side - Application databag, the functionality is limited to leaders - """ - if not relation_name: - relation_name = self.relation_name - - relations = [] - if relation_ids: - relations = [ - self.get_relation(relation_name, relation_id) for relation_id in relation_ids - ] - else: - relations = self.relations - - data = {} - for relation in relations: - if not relation_ids or relation.id in relation_ids: - data[relation.id] = self._fetch_my_specific_relation_data(relation, fields) - return data - - def fetch_my_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """Get a single field from the relation data -- owner side. - - NOTE: Since only the leader can read the relation's 'this_app'-side - Application databag, the functionality is limited to leaders - """ - if relation_data := self.fetch_my_relation_data([relation_id], [field], relation_name): - return relation_data.get(relation_id, {}).get(field) - - @leader_only - def update_relation_data(self, relation_id: int, data: dict) -> None: - """Update the data within the relation.""" - relation_name = self.relation_name - relation = self.get_relation(relation_name, relation_id) - return self._update_relation_data(relation, data) - - @leader_only - def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: - """Remove field from the relation.""" - relation_name = self.relation_name - relation = self.get_relation(relation_name, relation_id) - return self._delete_relation_data(relation, fields) - - -# Base DataProvides and DataRequires - - -class DataProvides(DataRelation): - """Base provides-side of the data products relation.""" - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - super().__init__(charm, relation_name) - - def _diff(self, event: RelationChangedEvent) -> Diff: - """Retrieves the diff of the data in the relation changed databag. - - Args: - event: relation changed event. - - Returns: - a Diff instance containing the added, deleted and changed - keys from the event relation databag. - """ - return diff(event, self.local_app) - - # Private methods handling secrets - - @juju_secrets_only - def _add_relation_secret( - self, - relation: Relation, - group_mapping: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - uri_to_databag=True, - ) -> bool: - """Add a new Juju Secret that will be registered in the relation databag.""" - secret_field = self._generate_secret_field_name(group_mapping) - if uri_to_databag and relation.data[self.component].get(secret_field): - logging.error("Secret for relation %s already exists, not adding again", relation.id) - return False - - content = self._content_for_secret_group(data, secret_fields, group_mapping) - - label = self._generate_secret_label(self.relation_name, relation.id, group_mapping) - secret = self.secrets.add(label, content, relation) - - # According to lint we may not have a Secret ID - if uri_to_databag and secret.meta and secret.meta.id: - relation.data[self.component][secret_field] = secret.meta.id - - # Return the content that was added - return True - - @juju_secrets_only - def _update_relation_secret( - self, - relation: Relation, - group_mapping: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - ) -> bool: - """Update the contents of an existing Juju Secret, referred in the relation databag.""" - secret = self._get_relation_secret(relation.id, group_mapping) - - if not secret: - logging.error("Can't update secret for relation %s", relation.id) - return False - - content = self._content_for_secret_group(data, secret_fields, group_mapping) - - old_content = secret.get_content() - full_content = copy.deepcopy(old_content) - full_content.update(content) - secret.set_content(full_content) - - # Return True on success - return True - - def _add_or_update_relation_secrets( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - uri_to_databag=True, - ) -> bool: - """Update contents for Secret group. If the Secret doesn't exist, create it.""" - if self._get_relation_secret(relation.id, group): - return self._update_relation_secret(relation, group, secret_fields, data) - else: - return self._add_relation_secret(relation, group, secret_fields, data, uri_to_databag) - - @juju_secrets_only - def _delete_relation_secret( - self, relation: Relation, group: SecretGroup, secret_fields: List[str], fields: List[str] - ) -> bool: - """Update the contents of an existing Juju Secret, referred in the relation databag.""" - secret = self._get_relation_secret(relation.id, group) - - if not secret: - logging.error("Can't delete secret for relation %s", str(relation.id)) - return False - - old_content = secret.get_content() - new_content = copy.deepcopy(old_content) - for field in fields: - try: - new_content.pop(field) - except KeyError: - logging.error( - "Non-existing secret was attempted to be removed %s, %s", - str(relation.id), - str(field), - ) - return False - - secret.set_content(new_content) - - # Remove secret from the relation if it's fully gone - if not new_content: - field = self._generate_secret_field_name(group) - try: - relation.data[self.component].pop(field) - except KeyError: - pass - - # Return the content that was removed - return True - - # Mandatory internal overrides - - @juju_secrets_only - def _get_relation_secret( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - if not relation_name: - relation_name = self.relation_name - - label = self._generate_secret_label(relation_name, relation_id, group_mapping) - if secret := self.secrets.get(label): - return secret - - relation = self.charm.model.get_relation(relation_name, relation_id) - if not relation: - return - - secret_field = self._generate_secret_field_name(group_mapping) - if secret_uri := relation.data[self.local_app].get(secret_field): - return self.secrets.get(label, secret_uri) - - def _fetch_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetching relation data for Provides. - - NOTE: Since all secret fields are in the Provides side of the databag, we don't need to worry about that - """ - if not relation.app: - return {} - - return self._fetch_relation_data_without_secrets(relation.app, relation, fields) - - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> dict: - """Fetching our own relation data.""" - secret_fields = None - if relation.app: - secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - return self._fetch_relation_data_with_secrets( - self.local_app, - secret_fields, - relation, - fields, - ) - - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Set values for fields not caring whether it's a secret or not.""" - req_secret_fields = [] - if relation.app: - req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - _, normal_fields = self._process_secret_fields( - relation, - req_secret_fields, - list(data), - self._add_or_update_relation_secrets, - data=data, - ) - - normal_content = {k: v for k, v in data.items() if k in normal_fields} - self._update_relation_data_without_secrets(self.local_app, relation, normal_content) - - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete fields from the Relation not caring whether it's a secret or not.""" - req_secret_fields = [] - if relation.app: - req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - _, normal_fields = self._process_secret_fields( - relation, req_secret_fields, fields, self._delete_relation_secret, fields=fields - ) - self._delete_relation_data_without_secrets(self.local_app, relation, list(normal_fields)) - - # Public methods - "native" - - def set_credentials(self, relation_id: int, username: str, password: str) -> None: - """Set credentials. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - username: user that was created. - password: password of the created user. - """ - self.update_relation_data(relation_id, {"username": username, "password": password}) - - def set_tls(self, relation_id: int, tls: str) -> None: - """Set whether TLS is enabled. - - Args: - relation_id: the identifier for a particular relation. - tls: whether tls is enabled (True or False). - """ - self.update_relation_data(relation_id, {"tls": tls}) - - def set_tls_ca(self, relation_id: int, tls_ca: str) -> None: - """Set the TLS CA in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - tls_ca: TLS certification authority. - """ - self.update_relation_data(relation_id, {"tls-ca": tls_ca}) - - # Public functions -- inherited - - fetch_my_relation_data = leader_only(DataRelation.fetch_my_relation_data) - fetch_my_relation_field = leader_only(DataRelation.fetch_my_relation_field) - - -class DataRequires(DataRelation): - """Requires-side of the relation.""" - - SECRET_FIELDS = ["username", "password", "tls", "tls-ca", "uris"] - - def __init__( - self, - charm, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of base client relations.""" - super().__init__(charm, relation_name) - self.extra_user_roles = extra_user_roles - self._secret_fields = list(self.SECRET_FIELDS) - if additional_secret_fields: - self._secret_fields += additional_secret_fields - - self.framework.observe( - self.charm.on[relation_name].relation_created, self._on_relation_created_event - ) - self.framework.observe( - charm.on.secret_changed, - self._on_secret_changed_event, - ) - - @property - def secret_fields(self) -> Optional[List[str]]: - """Local access to secrets field, in case they are being used.""" - if self.secrets_enabled: - return self._secret_fields - - def _diff(self, event: RelationChangedEvent) -> Diff: - """Retrieves the diff of the data in the relation changed databag. - - Args: - event: relation changed event. - - Returns: - a Diff instance containing the added, deleted and changed - keys from the event relation databag. - """ - return diff(event, self.local_unit) - - # Internal helper functions - - def _register_secret_to_relation( - self, relation_name: str, relation_id: int, secret_id: str, group: SecretGroup - ): - """Fetch secrets and apply local label on them. - - [MAGIC HERE] - If we fetch a secret using get_secret(id=, label=), - then will be "stuck" on the Secret object, whenever it may - appear (i.e. as an event attribute, or fetched manually) on future occasions. - - This will allow us to uniquely identify the secret on Provides side (typically on - 'secret-changed' events), and map it to the corresponding relation. - """ - label = self._generate_secret_label(relation_name, relation_id, group) - - # Fetchin the Secret's meta information ensuring that it's locally getting registered with - CachedSecret(self.charm, self.component, label, secret_id).meta - - def _register_secrets_to_relation(self, relation: Relation, params_name_list: List[str]): - """Make sure that secrets of the provided list are locally 'registered' from the databag. - - More on 'locally registered' magic is described in _register_secret_to_relation() method - """ - if not relation.app: - return - - for group in SecretGroup: - secret_field = self._generate_secret_field_name(group) - if secret_field in params_name_list: - if secret_uri := relation.data[relation.app].get(secret_field): - self._register_secret_to_relation( - relation.name, relation.id, secret_uri, group - ) - - def _is_resource_created_for_relation(self, relation: Relation) -> bool: - if not relation.app: - return False - - data = self.fetch_relation_data([relation.id], ["username", "password"]).get( - relation.id, {} - ) - return bool(data.get("username")) and bool(data.get("password")) - - def is_resource_created(self, relation_id: Optional[int] = None) -> bool: - """Check if the resource has been created. - - This function can be used to check if the Provider answered with data in the charm code - when outside an event callback. - - Args: - relation_id (int, optional): When provided the check is done only for the relation id - provided, otherwise the check is done for all relations - - Returns: - True or False - - Raises: - IndexError: If relation_id is provided but that relation does not exist - """ - if relation_id is not None: - try: - relation = [relation for relation in self.relations if relation.id == relation_id][ - 0 - ] - return self._is_resource_created_for_relation(relation) - except IndexError: - raise IndexError(f"relation id {relation_id} cannot be accessed") - else: - return ( - all( - self._is_resource_created_for_relation(relation) for relation in self.relations - ) - if self.relations - else False - ) - - # Event handlers - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the relation is created.""" - if not self.local_unit.is_leader(): - return - - if self.secret_fields: - set_encoded_field( - event.relation, self.charm.app, REQ_SECRET_FIELDS, self.secret_fields - ) - - @abstractmethod - def _on_secret_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation data has changed.""" - raise NotImplementedError - - # Mandatory internal overrides - - @juju_secrets_only - def _get_relation_secret( - self, relation_id: int, group: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - if not relation_name: - relation_name = self.relation_name - - label = self._generate_secret_label(relation_name, relation_id, group) - return self.secrets.get(label) - - def _fetch_specific_relation_data( - self, relation, fields: Optional[List[str]] = None - ) -> Dict[str, str]: - """Fetching Requires data -- that may include secrets.""" - if not relation.app: - return {} - return self._fetch_relation_data_with_secrets( - relation.app, self.secret_fields, relation, fields - ) - - def _fetch_my_specific_relation_data(self, relation, fields: Optional[List[str]]) -> dict: - """Fetching our own relation data.""" - return self._fetch_relation_data_without_secrets(self.local_app, relation, fields) - - def _update_relation_data(self, relation: Relation, data: dict) -> None: - """Updates a set of key-value pairs in the relation. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation: the particular relation. - data: dict containing the key-value pairs - that should be updated in the relation. - """ - return self._update_relation_data_without_secrets(self.local_app, relation, data) - - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Deletes a set of fields from the relation. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation: the particular relation. - fields: list containing the field names that should be removed from the relation. - """ - return self._delete_relation_data_without_secrets(self.local_app, relation, fields) - - # Public functions -- inherited - - fetch_my_relation_data = leader_only(DataRelation.fetch_my_relation_data) - fetch_my_relation_field = leader_only(DataRelation.fetch_my_relation_field) - - -# Base DataPeer - - -class DataPeer(DataRequires, DataProvides): - """Represents peer relations.""" - - SECRET_FIELDS = ["operator-password"] - SECRET_FIELD_NAME = "internal_secret" - SECRET_LABEL_MAP = {} - - def __init__( - self, - charm, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - secret_field_name: Optional[str] = None, - deleted_label: Optional[str] = None, - ): - """Manager of base client relations.""" - DataRequires.__init__( - self, charm, relation_name, extra_user_roles, additional_secret_fields - ) - self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME - self.deleted_label = deleted_label - - @property - def scope(self) -> Optional[Scope]: - """Turn component information into Scope.""" - if isinstance(self.component, Application): - return Scope.APP - if isinstance(self.component, Unit): - return Scope.UNIT - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - pass - - def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: - """Event emitted when the secret has changed.""" - pass - - def _generate_secret_label( - self, relation_name: str, relation_id: int, group_mapping: SecretGroup - ) -> str: - members = [self.charm.app.name] - if self.scope: - members.append(self.scope.value) - return f"{'.'.join(members)}" - - def _generate_secret_field_name(self, group_mapping: SecretGroup = SecretGroup.EXTRA) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{self.secret_field_name}" - - @juju_secrets_only - def _get_relation_secret( - self, - relation_id: int, - group_mapping: SecretGroup = SecretGroup.EXTRA, - relation_name: Optional[str] = None, - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret specifically for peer relations. - - In case this code may be executed within a rolling upgrade, and we may need to - migrate secrets from the databag to labels, we make sure to stick the correct - label on the secret, and clean up the local databag. - """ - if not relation_name: - relation_name = self.relation_name - - relation = self.charm.model.get_relation(relation_name, relation_id) - if not relation: - return - - label = self._generate_secret_label(relation_name, relation_id, group_mapping) - secret_uri = relation.data[self.component].get(self._generate_secret_field_name(), None) - - # Fetching the secret with fallback to URI (in case label is not yet known) - # Label would we "stuck" on the secret in case it is found - secret = self.secrets.get(label, secret_uri) - - # Either app scope secret with leader executing, or unit scope secret - leader_or_unit_scope = self.component != self.local_app or self.local_unit.is_leader() - if secret_uri and secret and leader_or_unit_scope: - # Databag reference to the secret URI can be removed, now that it's labelled - relation.data[self.component].pop(self._generate_secret_field_name(), None) - return secret - - def _get_group_secret_contents( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Optional[Union[Set[str], List[str]]] = None, - ) -> Dict[str, str]: - """Helper function to retrieve collective, requested contents of a secret.""" - result = super()._get_group_secret_contents(relation, group, secret_fields) - if not self.deleted_label: - return result - return {key: result[key] for key in result if result[key] != self.deleted_label} - - def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: - """For Rolling Upgrades -- when moving from databag to secrets usage. - - Practically what happens here is to remove stuff from the databag that is - to be stored in secrets. - """ - if not self.secret_fields: - return - - secret_fields_passed = set(self.secret_fields) & set(fields) - for field in secret_fields_passed: - if self._fetch_relation_data_without_secrets(self.component, relation, [field]): - self._delete_relation_data_without_secrets(self.component, relation, [field]) - - def _fetch_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation.""" - return self._fetch_relation_data_with_secrets( - self.component, self.secret_fields, relation, fields - ) - - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - return self._fetch_relation_data_with_secrets( - self.component, self.secret_fields, relation, fields - ) - - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - self._remove_secret_from_databag(relation, list(data.keys())) - _, normal_fields = self._process_secret_fields( - relation, - self.secret_fields, - list(data), - self._add_or_update_relation_secrets, - data=data, - uri_to_databag=False, - ) - - normal_content = {k: v for k, v in data.items() if k in normal_fields} - self._update_relation_data_without_secrets(self.component, relation, normal_content) - - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - if self.secret_fields and self.deleted_label: - current_data = self.fetch_my_relation_data([relation.id], fields) - if current_data is not None: - # Check if the secret we wanna delete actually exists - # Given the "deleted label", here we can't rely on the default mechanism (i.e. 'key not found') - if non_existent := (set(fields) & set(self.secret_fields)) - set( - current_data.get(relation.id, []) - ): - logger.error( - "Non-existing secret %s was attempted to be removed.", - ", ".join(non_existent), - ) - - _, normal_fields = self._process_secret_fields( - relation, - self.secret_fields, - fields, - self._update_relation_secret, - data={field: self.deleted_label for field in fields}, - ) - else: - _, normal_fields = self._process_secret_fields( - relation, self.secret_fields, fields, self._delete_relation_secret, fields=fields - ) - self._delete_relation_data_without_secrets(self.component, relation, list(normal_fields)) - - def fetch_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Dict[int, Dict[str, str]]: - """This method makes no sense for a Peer Relation.""" - raise NotImplementedError( - "Peer Relation only supports 'self-side' fetch methods: " - "fetch_my_relation_data() and fetch_my_relation_field()" - ) - - def fetch_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """This method makes no sense for a Peer Relation.""" - raise NotImplementedError( - "Peer Relation only supports 'self-side' fetch methods: " - "fetch_my_relation_data() and fetch_my_relation_field()" - ) - - # Public functions -- inherited - - fetch_my_relation_data = DataRelation.fetch_my_relation_data - fetch_my_relation_field = DataRelation.fetch_my_relation_field - - -class DataPeerUnit(DataPeer): - """Unit databag representation.""" - - SCOPE = Scope.UNIT - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -# General events - - -class ExtraRoleEvent(RelationEvent): - """Base class for data events.""" - - @property - def extra_user_roles(self) -> Optional[str]: - """Returns the extra user roles that were requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("extra-user-roles") - - -class RelationEventWithSecret(RelationEvent): - """Base class for Relation Events that need to handle secrets.""" - - @property - def _secrets(self) -> dict: - """Caching secrets to avoid fetching them each time a field is referrd. - - DON'T USE the encapsulated helper variable outside of this function - """ - if not hasattr(self, "_cached_secrets"): - self._cached_secrets = {} - return self._cached_secrets - - def _get_secret(self, group) -> Optional[Dict[str, str]]: - """Retrieveing secrets.""" - if not self.app: - return - if not self._secrets.get(group): - self._secrets[group] = None - secret_field = f"{PROV_SECRET_PREFIX}{group}" - if secret_uri := self.relation.data[self.app].get(secret_field): - secret = self.framework.model.get_secret(id=secret_uri) - self._secrets[group] = secret.get_content() - return self._secrets[group] - - @property - def secrets_enabled(self): - """Is this Juju version allowing for Secrets usage?""" - return JujuVersion.from_environ().has_secrets - - -class AuthenticationEvent(RelationEventWithSecret): - """Base class for authentication fields for events. - - The amount of logic added here is not ideal -- but this was the only way to preserve - the interface when moving to Juju Secrets - """ - - @property - def username(self) -> Optional[str]: - """Returns the created username.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("username") - - return self.relation.data[self.relation.app].get("username") - - @property - def password(self) -> Optional[str]: - """Returns the password for the created user.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("password") - - return self.relation.data[self.relation.app].get("password") - - @property - def tls(self) -> Optional[str]: - """Returns whether TLS is configured.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("tls") - if secret: - return secret.get("tls") - - return self.relation.data[self.relation.app].get("tls") - - @property - def tls_ca(self) -> Optional[str]: - """Returns TLS CA.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("tls") - if secret: - return secret.get("tls-ca") - - return self.relation.data[self.relation.app].get("tls-ca") - - -# Database related events and fields - - -class DatabaseProvidesEvent(RelationEvent): - """Base class for database events.""" - - @property - def database(self) -> Optional[str]: - """Returns the database that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("database") - - -class DatabaseRequestedEvent(DatabaseProvidesEvent, ExtraRoleEvent): - """Event emitted when a new database is requested for use on this relation.""" - - -class DatabaseProvidesEvents(CharmEvents): - """Database events. - - This class defines the events that the database can emit. - """ - - database_requested = EventSource(DatabaseRequestedEvent) - - -class DatabaseRequiresEvent(RelationEventWithSecret): - """Base class for database events.""" - - @property - def database(self) -> Optional[str]: - """Returns the database name.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("database") - - @property - def endpoints(self) -> Optional[str]: - """Returns a comma separated list of read/write endpoints. - - In VM charms, this is the primary's address. - In kubernetes charms, this is the service to the primary pod. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("endpoints") - - @property - def read_only_endpoints(self) -> Optional[str]: - """Returns a comma separated list of read only endpoints. - - In VM charms, this is the address of all the secondary instances. - In kubernetes charms, this is the service to all replica pod instances. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("read-only-endpoints") - - @property - def replset(self) -> Optional[str]: - """Returns the replicaset name. - - MongoDB only. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("replset") - - @property - def uris(self) -> Optional[str]: - """Returns the connection URIs. - - MongoDB, Redis, OpenSearch. - """ - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("uris") - - return self.relation.data[self.relation.app].get("uris") - - @property - def version(self) -> Optional[str]: - """Returns the version of the database. - - Version as informed by the database daemon. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("version") - - -class DatabaseCreatedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when a new database is created for use on this relation.""" - - -class DatabaseEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when the read/write endpoints are changed.""" - - -class DatabaseReadOnlyEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when the read only endpoints are changed.""" - - -class DatabaseRequiresEvents(CharmEvents): - """Database events. - - This class defines the events that the database can emit. - """ - - database_created = EventSource(DatabaseCreatedEvent) - endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) - read_only_endpoints_changed = EventSource(DatabaseReadOnlyEndpointsChangedEvent) - - -# Database Provider and Requires - - -class DatabaseProvides(DataProvides): - """Provider-side of the database relations.""" - - on = DatabaseProvidesEvents() # pyright: ignore [reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - super().__init__(charm, relation_name) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.local_unit.is_leader(): - return - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit a database requested event if the setup key (database name and optional - # extra user roles) was added to the relation databag by the application. - if "database" in diff.added: - getattr(self.on, "database_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - def set_database(self, relation_id: int, database_name: str) -> None: - """Set database name. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - database_name: database name. - """ - self.update_relation_data(relation_id, {"database": database_name}) - - def set_endpoints(self, relation_id: int, connection_strings: str) -> None: - """Set database primary connections. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - In VM charms, only the primary's address should be passed as an endpoint. - In kubernetes charms, the service endpoint to the primary pod should be - passed as an endpoint. - - Args: - relation_id: the identifier for a particular relation. - connection_strings: database hosts and ports comma separated list. - """ - self.update_relation_data(relation_id, {"endpoints": connection_strings}) - - def set_read_only_endpoints(self, relation_id: int, connection_strings: str) -> None: - """Set database replicas connection strings. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - connection_strings: database hosts and ports comma separated list. - """ - self.update_relation_data(relation_id, {"read-only-endpoints": connection_strings}) - - def set_replset(self, relation_id: int, replset: str) -> None: - """Set replica set name in the application relation databag. - - MongoDB only. - - Args: - relation_id: the identifier for a particular relation. - replset: replica set name. - """ - self.update_relation_data(relation_id, {"replset": replset}) - - def set_uris(self, relation_id: int, uris: str) -> None: - """Set the database connection URIs in the application relation databag. - - MongoDB, Redis, and OpenSearch only. - - Args: - relation_id: the identifier for a particular relation. - uris: connection URIs. - """ - self.update_relation_data(relation_id, {"uris": uris}) - - def set_version(self, relation_id: int, version: str) -> None: - """Set the database version in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - version: database version. - """ - self.update_relation_data(relation_id, {"version": version}) - - -class DatabaseRequires(DataRequires): - """Requires-side of the database relation.""" - - on = DatabaseRequiresEvents() # pyright: ignore [reportAssignmentType] - - def __init__( - self, - charm, - relation_name: str, - database_name: str, - extra_user_roles: Optional[str] = None, - relations_aliases: Optional[List[str]] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of database client relations.""" - super().__init__(charm, relation_name, extra_user_roles, additional_secret_fields) - self.database = database_name - self.relations_aliases = relations_aliases - - # Define custom event names for each alias. - if relations_aliases: - # Ensure the number of aliases does not exceed the maximum - # of connections allowed in the specific relation. - relation_connection_limit = self.charm.meta.requires[relation_name].limit - if len(relations_aliases) != relation_connection_limit: - raise ValueError( - f"The number of aliases must match the maximum number of connections allowed in the relation. " - f"Expected {relation_connection_limit}, got {len(relations_aliases)}" - ) - - for relation_alias in relations_aliases: - self.on.define_event(f"{relation_alias}_database_created", DatabaseCreatedEvent) - self.on.define_event( - f"{relation_alias}_endpoints_changed", DatabaseEndpointsChangedEvent - ) - self.on.define_event( - f"{relation_alias}_read_only_endpoints_changed", - DatabaseReadOnlyEndpointsChangedEvent, - ) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - pass - - def _assign_relation_alias(self, relation_id: int) -> None: - """Assigns an alias to a relation. - - This function writes in the unit data bag. - - Args: - relation_id: the identifier for a particular relation. - """ - # If no aliases were provided, return immediately. - if not self.relations_aliases: - return - - # Return if an alias was already assigned to this relation - # (like when there are more than one unit joining the relation). - relation = self.charm.model.get_relation(self.relation_name, relation_id) - if relation and relation.data[self.local_unit].get("alias"): - return - - # Retrieve the available aliases (the ones that weren't assigned to any relation). - available_aliases = self.relations_aliases[:] - for relation in self.charm.model.relations[self.relation_name]: - alias = relation.data[self.local_unit].get("alias") - if alias: - logger.debug("Alias %s was already assigned to relation %d", alias, relation.id) - available_aliases.remove(alias) - - # Set the alias in the unit relation databag of the specific relation. - relation = self.charm.model.get_relation(self.relation_name, relation_id) - if relation: - relation.data[self.local_unit].update({"alias": available_aliases[0]}) - - # We need to set relation alias also on the application level so, - # it will be accessible in show-unit juju command, executed for a consumer application unit - if self.local_unit.is_leader(): - self.update_relation_data(relation_id, {"alias": available_aliases[0]}) - - def _emit_aliased_event(self, event: RelationChangedEvent, event_name: str) -> None: - """Emit an aliased event to a particular relation if it has an alias. - - Args: - event: the relation changed event that was received. - event_name: the name of the event to emit. - """ - alias = self._get_relation_alias(event.relation.id) - if alias: - getattr(self.on, f"{alias}_{event_name}").emit( - event.relation, app=event.app, unit=event.unit - ) - - def _get_relation_alias(self, relation_id: int) -> Optional[str]: - """Returns the relation alias. - - Args: - relation_id: the identifier for a particular relation. - - Returns: - the relation alias or None if the relation was not found. - """ - for relation in self.charm.model.relations[self.relation_name]: - if relation.id == relation_id: - return relation.data[self.local_unit].get("alias") - return None - - def is_postgresql_plugin_enabled(self, plugin: str, relation_index: int = 0) -> bool: - """Returns whether a plugin is enabled in the database. - - Args: - plugin: name of the plugin to check. - relation_index: optional relation index to check the database - (default: 0 - first relation). - - PostgreSQL only. - """ - # Psycopg 3 is imported locally to avoid the need of its package installation - # when relating to a database charm other than PostgreSQL. - import psycopg - - # Return False if no relation is established. - if len(self.relations) == 0: - return False - - relation_id = self.relations[relation_index].id - host = self.fetch_relation_field(relation_id, "endpoints") - - # Return False if there is no endpoint available. - if host is None: - return False - - host = host.split(":")[0] - - content = self.fetch_relation_data([relation_id], ["username", "password"]).get( - relation_id, {} - ) - user = content.get("username") - password = content.get("password") - - connection_string = ( - f"host='{host}' dbname='{self.database}' user='{user}' password='{password}'" - ) - try: - with psycopg.connect(connection_string) as connection: - with connection.cursor() as cursor: - cursor.execute( - "SELECT TRUE FROM pg_extension WHERE extname=%s::text;", (plugin,) - ) - return cursor.fetchone() is not None - except psycopg.Error as e: - logger.exception( - f"failed to check whether {plugin} plugin is enabled in the database: %s", str(e) - ) - return False - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the database relation is created.""" - super()._on_relation_created_event(event) - - # If relations aliases were provided, assign one to the relation. - self._assign_relation_alias(event.relation.id) - - # Sets both database and extra user roles in the relation - # if the roles are provided. Otherwise, sets only the database. - if not self.local_unit.is_leader(): - return - - if self.extra_user_roles: - self.update_relation_data( - event.relation.id, - { - "database": self.database, - "extra-user-roles": self.extra_user_roles, - }, - ) - else: - self.update_relation_data(event.relation.id, {"database": self.database}) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the database relation has changed.""" - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self._is_secret_field(newval)): - self._register_secrets_to_relation(event.relation, diff.added) - - # Check if the database is created - # (the database charm shared the credentials). - secret_field_user = self._generate_secret_field_name(SecretGroup.USER) - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("database created at %s", datetime.now()) - getattr(self.on, "database_created").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "database_created") - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “database_created“ is triggered. - return - - # Emit an endpoints changed event if the database - # added or changed this info in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "endpoints_changed") - - # To avoid unnecessary application restarts do not trigger - # “read_only_endpoints_changed“ event if “endpoints_changed“ is triggered. - return - - # Emit a read only endpoints changed event if the database - # added or changed this info in the relation databag. - if "read-only-endpoints" in diff.added or "read-only-endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("read-only-endpoints changed on %s", datetime.now()) - getattr(self.on, "read_only_endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "read_only_endpoints_changed") - - -# Kafka related events - - -class KafkaProvidesEvent(RelationEvent): - """Base class for Kafka events.""" - - @property - def topic(self) -> Optional[str]: - """Returns the topic that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("topic") - - @property - def consumer_group_prefix(self) -> Optional[str]: - """Returns the consumer-group-prefix that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("consumer-group-prefix") - - -class TopicRequestedEvent(KafkaProvidesEvent, ExtraRoleEvent): - """Event emitted when a new topic is requested for use on this relation.""" - - -class KafkaProvidesEvents(CharmEvents): - """Kafka events. - - This class defines the events that the Kafka can emit. - """ - - topic_requested = EventSource(TopicRequestedEvent) - - -class KafkaRequiresEvent(RelationEvent): - """Base class for Kafka events.""" - - @property - def topic(self) -> Optional[str]: - """Returns the topic.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("topic") - - @property - def bootstrap_server(self) -> Optional[str]: - """Returns a comma-separated list of broker uris.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("endpoints") - - @property - def consumer_group_prefix(self) -> Optional[str]: - """Returns the consumer-group-prefix.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("consumer-group-prefix") - - @property - def zookeeper_uris(self) -> Optional[str]: - """Returns a comma separated list of Zookeeper uris.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("zookeeper-uris") - - -class TopicCreatedEvent(AuthenticationEvent, KafkaRequiresEvent): - """Event emitted when a new topic is created for use on this relation.""" - - -class BootstrapServerChangedEvent(AuthenticationEvent, KafkaRequiresEvent): - """Event emitted when the bootstrap server is changed.""" - - -class KafkaRequiresEvents(CharmEvents): - """Kafka events. - - This class defines the events that the Kafka can emit. - """ - - topic_created = EventSource(TopicCreatedEvent) - bootstrap_server_changed = EventSource(BootstrapServerChangedEvent) - - -# Kafka Provides and Requires - - -class KafkaProvides(DataProvides): - """Provider-side of the Kafka relation.""" - - on = KafkaProvidesEvents() # pyright: ignore [reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - super().__init__(charm, relation_name) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.local_unit.is_leader(): - return - - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit a topic requested event if the setup key (topic name and optional - # extra user roles) was added to the relation databag by the application. - if "topic" in diff.added: - getattr(self.on, "topic_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - def set_topic(self, relation_id: int, topic: str) -> None: - """Set topic name in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - topic: the topic name. - """ - self.update_relation_data(relation_id, {"topic": topic}) - - def set_bootstrap_server(self, relation_id: int, bootstrap_server: str) -> None: - """Set the bootstrap server in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - bootstrap_server: the bootstrap server address. - """ - self.update_relation_data(relation_id, {"endpoints": bootstrap_server}) - - def set_consumer_group_prefix(self, relation_id: int, consumer_group_prefix: str) -> None: - """Set the consumer group prefix in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - consumer_group_prefix: the consumer group prefix string. - """ - self.update_relation_data(relation_id, {"consumer-group-prefix": consumer_group_prefix}) - - def set_zookeeper_uris(self, relation_id: int, zookeeper_uris: str) -> None: - """Set the zookeeper uris in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - zookeeper_uris: comma-separated list of ZooKeeper server uris. - """ - self.update_relation_data(relation_id, {"zookeeper-uris": zookeeper_uris}) - - -class KafkaRequires(DataRequires): - """Requires-side of the Kafka relation.""" - - on = KafkaRequiresEvents() # pyright: ignore [reportAssignmentType] - - def __init__( - self, - charm, - relation_name: str, - topic: str, - extra_user_roles: Optional[str] = None, - consumer_group_prefix: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of Kafka client relations.""" - # super().__init__(charm, relation_name) - super().__init__(charm, relation_name, extra_user_roles, additional_secret_fields) - self.charm = charm - self.topic = topic - self.consumer_group_prefix = consumer_group_prefix or "" - - @property - def topic(self): - """Topic to use in Kafka.""" - return self._topic - - @topic.setter - def topic(self, value): - # Avoid wildcards - if value == "*": - raise ValueError(f"Error on topic '{value}', cannot be a wildcard.") - self._topic = value - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the Kafka relation is created.""" - super()._on_relation_created_event(event) - - if not self.local_unit.is_leader(): - return - - # Sets topic, extra user roles, and "consumer-group-prefix" in the relation - relation_data = { - f: getattr(self, f.replace("-", "_"), "") - for f in ["consumer-group-prefix", "extra-user-roles", "topic"] - } - - self.update_relation_data(event.relation.id, relation_data) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - pass - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the Kafka relation has changed.""" - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Check if the topic is created - # (the Kafka charm shared the credentials). - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self._is_secret_field(newval)): - self._register_secrets_to_relation(event.relation, diff.added) - - secret_field_user = self._generate_secret_field_name(SecretGroup.USER) - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("topic created at %s", datetime.now()) - getattr(self.on, "topic_created").emit(event.relation, app=event.app, unit=event.unit) - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “topic_created“ is triggered. - return - - # Emit an endpoints (bootstrap-server) changed event if the Kafka endpoints - # added or changed this info in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "bootstrap_server_changed").emit( - event.relation, app=event.app, unit=event.unit - ) # here check if this is the right design - return - - -# Opensearch related events - - -class OpenSearchProvidesEvent(RelationEvent): - """Base class for OpenSearch events.""" - - @property - def index(self) -> Optional[str]: - """Returns the index that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("index") - - -class IndexRequestedEvent(OpenSearchProvidesEvent, ExtraRoleEvent): - """Event emitted when a new index is requested for use on this relation.""" - - -class OpenSearchProvidesEvents(CharmEvents): - """OpenSearch events. - - This class defines the events that OpenSearch can emit. - """ - - index_requested = EventSource(IndexRequestedEvent) - - -class OpenSearchRequiresEvent(DatabaseRequiresEvent): - """Base class for OpenSearch requirer events.""" - - -class IndexCreatedEvent(AuthenticationEvent, OpenSearchRequiresEvent): - """Event emitted when a new index is created for use on this relation.""" - - -class OpenSearchRequiresEvents(CharmEvents): - """OpenSearch events. - - This class defines the events that the opensearch requirer can emit. - """ - - index_created = EventSource(IndexCreatedEvent) - endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) - authentication_updated = EventSource(AuthenticationEvent) - - -# OpenSearch Provides and Requires Objects - - -class OpenSearchProvides(DataProvides): - """Provider-side of the OpenSearch relation.""" - - on = OpenSearchProvidesEvents() # pyright: ignore[reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - super().__init__(charm, relation_name) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.local_unit.is_leader(): - return - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit an index requested event if the setup key (index name and optional extra user roles) - # have been added to the relation databag by the application. - if "index" in diff.added: - getattr(self.on, "index_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - def set_index(self, relation_id: int, index: str) -> None: - """Set the index in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - index: the index as it is _created_ on the provider charm. This needn't match the - requested index, and can be used to present a different index name if, for example, - the requested index is invalid. - """ - self.update_relation_data(relation_id, {"index": index}) - - def set_endpoints(self, relation_id: int, endpoints: str) -> None: - """Set the endpoints in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - endpoints: the endpoint addresses for opensearch nodes. - """ - self.update_relation_data(relation_id, {"endpoints": endpoints}) - - def set_version(self, relation_id: int, version: str) -> None: - """Set the opensearch version in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - version: database version. - """ - self.update_relation_data(relation_id, {"version": version}) - - -class OpenSearchRequires(DataRequires): - """Requires-side of the OpenSearch relation.""" - - on = OpenSearchRequiresEvents() # pyright: ignore[reportAssignmentType] - - def __init__( - self, - charm, - relation_name: str, - index: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of OpenSearch client relations.""" - super().__init__(charm, relation_name, extra_user_roles, additional_secret_fields) - self.charm = charm - self.index = index - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the OpenSearch relation is created.""" - super()._on_relation_created_event(event) - - if not self.local_unit.is_leader(): - return - - # Sets both index and extra user roles in the relation if the roles are provided. - # Otherwise, sets only the index. - data = {"index": self.index} - if self.extra_user_roles: - data["extra-user-roles"] = self.extra_user_roles - - self.update_relation_data(event.relation.id, data) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - if not event.secret.label: - return - - relation = self._relation_from_secret_label(event.secret.label) - if not relation: - logging.info( - f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" - ) - return - - if relation.app == self.charm.app: - logging.info("Secret changed event ignored for Secret Owner") - - remote_unit = None - for unit in relation.units: - if unit.app != self.charm.app: - remote_unit = unit - - logger.info("authentication updated") - getattr(self.on, "authentication_updated").emit( - relation, app=relation.app, unit=remote_unit - ) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the OpenSearch relation has changed. - - This event triggers individual custom events depending on the changing relation. - """ - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self._is_secret_field(newval)): - self._register_secrets_to_relation(event.relation, diff.added) - - secret_field_user = self._generate_secret_field_name(SecretGroup.USER) - secret_field_tls = self._generate_secret_field_name(SecretGroup.TLS) - updates = {"username", "password", "tls", "tls-ca", secret_field_user, secret_field_tls} - if len(set(diff._asdict().keys()) - updates) < len(diff): - logger.info("authentication updated at: %s", datetime.now()) - getattr(self.on, "authentication_updated").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Check if the index is created - # (the OpenSearch charm shares the credentials). - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("index created at: %s", datetime.now()) - getattr(self.on, "index_created").emit(event.relation, app=event.app, unit=event.unit) - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “index_created“ is triggered. - return - - # Emit a endpoints changed event if the OpenSearch application added or changed this info - # in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) # here check if this is the right design - return diff --git a/charm/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/charm/lib/charms/grafana_k8s/v0/grafana_dashboard.py deleted file mode 100644 index 1f1bc4f..0000000 --- a/charm/lib/charms/grafana_k8s/v0/grafana_dashboard.py +++ /dev/null @@ -1,2013 +0,0 @@ -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -"""## Overview. - -This document explains how to integrate with the Grafana charm -for the purpose of providing a dashboard which can be used by -end users. It also explains the structure of the data -expected by the `grafana-dashboard` interface, and may provide a -mechanism or reference point for providing a compatible interface -or library by providing a definitive reference guide to the -structure of relation data which is shared between the Grafana -charm and any charm providing datasource information. - -## Provider Library Usage - -The Grafana charm interacts with its dashboards using its charm -library. The goal of this library is to be as simple to use as -possible, and instantiation of the class with or without changing -the default arguments provides a complete use case. For the simplest -use case of a charm which bundles dashboards and provides a -`provides: grafana-dashboard` interface, - - requires: - grafana-dashboard: - interface: grafana_dashboard - -creation of a `GrafanaDashboardProvider` object with the default arguments is -sufficient. - -:class:`GrafanaDashboardProvider` expects that bundled dashboards should -be included in your charm with a default path of: - - path/to/charm.py - path/to/src/grafana_dashboards/*.{json|json.tmpl|.tmpl} - -Where the files are Grafana dashboard JSON data either from the -Grafana marketplace, or directly exported from a Grafana instance. -Refer to the [official docs](https://grafana.com/tutorials/provision-dashboards-and-data-sources/) -for more information. - -When constructing a dashboard that is intended to be consumed by COS, make sure to use variables -for your datasources, and name them "prometheusds" and "lokids". You can also use the following -juju topology variables in your dashboards: $juju_model, $juju_model_uuid, $juju_application -and $juju_unit. Note, however, that if metrics are coming via peripheral charms (scrape-config -or cos-config) then topology labels would not exist. - -The default constructor arguments are: - - `charm`: `self` from the charm instantiating this library - `relation_name`: grafana-dashboard - `dashboards_path`: "/src/grafana_dashboards" - -If your configuration requires any changes from these defaults, they -may be set from the class constructor. It may be instantiated as -follows: - - from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider - - class FooCharm: - def __init__(self, *args): - super().__init__(*args, **kwargs) - ... - self.grafana_dashboard_provider = GrafanaDashboardProvider(self) - ... - -The first argument (`self`) should be a reference to the parent (providing -dashboards), as this charm's lifecycle events will be used to re-submit -dashboard information if a charm is upgraded, the pod is restarted, or other. - -An instantiated `GrafanaDashboardProvider` validates that the path specified -in the constructor (or the default) exists, reads the file contents, then -compresses them with LZMA and adds them to the application relation data -when a relation is established with Grafana. - -Provided dashboards will be checked by Grafana, and a series of dropdown menus -providing the ability to select query targets by Juju Model, application instance, -and unit will be added if they do not exist. - -To avoid requiring `jinja` in `GrafanaDashboardProvider` users, template validation -and rendering occurs on the other side of the relation, and relation data in -the form of: - - { - "event": { - "valid": `true|false`, - "errors": [], - } - } - -Will be returned if rendering or validation fails. In this case, the -`GrafanaDashboardProvider` object will emit a `dashboard_status_changed` event -of the type :class:`GrafanaDashboardEvent`, which will contain information -about the validation error. - -This information is added to the relation data for the charms as serialized JSON -from a dict, with a structure of: -``` -{ - "application": { - "dashboards": { - "uuid": a uuid generated to ensure a relation event triggers, - "templates": { - "file:{hash}": { - "content": `{compressed_template_data}`, - "charm": `charm.meta.name`, - "juju_topology": { - "model": `charm.model.name`, - "model_uuid": `charm.model.uuid`, - "application": `charm.app.name`, - "unit": `charm.unit.name`, - } - }, - "file:{other_file_hash}": { - ... - }, - }, - }, - }, -} -``` - -This is ingested by :class:`GrafanaDashboardConsumer`, and is sufficient for configuration. - -The [COS Configuration Charm](https://charmhub.io/cos-configuration-k8s) can be used to -add dashboards which are not bundled with charms. - -## Consumer Library Usage - -The `GrafanaDashboardConsumer` object may be used by Grafana -charms to manage relations with available dashboards. For this -purpose, a charm consuming Grafana dashboard information should do -the following things: - -1. Instantiate the `GrafanaDashboardConsumer` object by providing it a -reference to the parent (Grafana) charm and, optionally, the name of -the relation that the Grafana charm uses to interact with dashboards. -This relation must confirm to the `grafana-dashboard` interface. - -For example a Grafana charm may instantiate the -`GrafanaDashboardConsumer` in its constructor as follows - - from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardConsumer - - def __init__(self, *args): - super().__init__(*args) - ... - self.grafana_dashboard_consumer = GrafanaDashboardConsumer(self) - ... - -2. A Grafana charm also needs to listen to the -`GrafanaDashboardConsumer` events emitted by the `GrafanaDashboardConsumer` -by adding itself as an observer for these events: - - self.framework.observe( - self.grafana_source_consumer.on.sources_changed, - self._on_dashboards_changed, - ) - -Dashboards can be retrieved the :meth:`dashboards`: - -It will be returned in the format of: - -``` -[ - { - "id": unique_id, - "relation_id": relation_id, - "charm": the name of the charm which provided the dashboard, - "content": compressed_template_data - }, -] -``` - -The consuming charm should decompress the dashboard. -""" - -import base64 -import hashlib -import json -import logging -import lzma -import os -import platform -import re -import subprocess -import tempfile -import uuid -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -import yaml -from ops.charm import ( - CharmBase, - HookEvent, - RelationBrokenEvent, - RelationChangedEvent, - RelationCreatedEvent, - RelationEvent, - RelationRole, -) -from ops.framework import ( - EventBase, - EventSource, - Object, - ObjectEvents, - StoredDict, - StoredList, - StoredState, -) -from ops.model import Relation - -# The unique Charmhub library identifier, never change it -LIBID = "c49eb9c7dfef40c7b6235ebd67010a3f" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version - -LIBPATCH = 35 - -logger = logging.getLogger(__name__) - - -DEFAULT_RELATION_NAME = "grafana-dashboard" -DEFAULT_PEER_NAME = "grafana" -RELATION_INTERFACE_NAME = "grafana_dashboard" - -TOPOLOGY_TEMPLATE_DROPDOWNS = [ # type: ignore - { - "allValue": ".*", - "datasource": "${prometheusds}", - "definition": "label_values(up,juju_model)", - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Juju model", - "multi": True, - "name": "juju_model", - "query": { - "query": "label_values(up,juju_model)", - "refId": "StandardVariableQuery", - }, - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": False, - }, - { - "allValue": ".*", - "datasource": "${prometheusds}", - "definition": 'label_values(up{juju_model=~"$juju_model"},juju_model_uuid)', - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Juju model uuid", - "multi": True, - "name": "juju_model_uuid", - "query": { - "query": 'label_values(up{juju_model=~"$juju_model"},juju_model_uuid)', - "refId": "StandardVariableQuery", - }, - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": False, - }, - { - "allValue": ".*", - "datasource": "${prometheusds}", - "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid"},juju_application)', - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Juju application", - "multi": True, - "name": "juju_application", - "query": { - "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid"},juju_application)', - "refId": "StandardVariableQuery", - }, - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": False, - }, - { - "allValue": ".*", - "datasource": "${prometheusds}", - "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},juju_unit)', - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Juju unit", - "multi": True, - "name": "juju_unit", - "query": { - "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},juju_unit)', - "refId": "StandardVariableQuery", - }, - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": False, - }, -] - -DATASOURCE_TEMPLATE_DROPDOWNS = [ # type: ignore - { - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Prometheus datasource", - "multi": True, - "name": "prometheusds", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "type": "datasource", - }, - { - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "Loki datasource", - "multi": True, - "name": "lokids", - "options": [], - "query": "loki", - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "type": "datasource", - }, -] - -REACTIVE_CONVERTER = { # type: ignore - "allValue": None, - "datasource": "${prometheusds}", - "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},host)', - "description": None, - "error": None, - "hide": 0, - "includeAll": True, - "label": "hosts", - "multi": True, - "name": "host", - "options": [], - "query": { - "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},host)', - "refId": "StandardVariableQuery", - }, - "refresh": 1, - "regex": "", - "skipUrlSync": False, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": False, -} - - -class RelationNotFoundError(Exception): - """Raised if there is no relation with the given name.""" - - def __init__(self, relation_name: str): - self.relation_name = relation_name - self.message = "No relation named '{}' found".format(relation_name) - - super().__init__(self.message) - - -class RelationInterfaceMismatchError(Exception): - """Raised if the relation with the given name has a different interface.""" - - def __init__( - self, - relation_name: str, - expected_relation_interface: str, - actual_relation_interface: str, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_interface - self.actual_relation_interface = actual_relation_interface - self.message = ( - "The '{}' relation has '{}' as " - "interface rather than the expected '{}'".format( - relation_name, actual_relation_interface, expected_relation_interface - ) - ) - - super().__init__(self.message) - - -class RelationRoleMismatchError(Exception): - """Raised if the relation with the given name has a different direction.""" - - def __init__( - self, - relation_name: str, - expected_relation_role: RelationRole, - actual_relation_role: RelationRole, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_role - self.actual_relation_role = actual_relation_role - self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( - relation_name, repr(actual_relation_role), repr(expected_relation_role) - ) - - super().__init__(self.message) - - -class InvalidDirectoryPathError(Exception): - """Raised if the grafana dashboards folder cannot be found or is otherwise invalid.""" - - def __init__( - self, - grafana_dashboards_absolute_path: str, - message: str, - ): - self.grafana_dashboards_absolute_path = grafana_dashboards_absolute_path - self.message = message - - super().__init__(self.message) - - -def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: - """Resolve the provided path items against the directory of the main file. - - Look up the directory of the charmed operator file being executed. This is normally - going to be the charm.py file of the charm including this library. Then, resolve - the provided path elements and return its absolute path. - - Raises: - InvalidDirectoryPathError if the resolved path does not exist or it is not a directory - - """ - charm_dir = Path(str(charm.charm_dir)) - if not charm_dir.exists() or not charm_dir.is_dir(): - # Operator Framework does not currently expose a robust - # way to determine the top level charm source directory - # that is consistent across deployed charms and unit tests - # Hence for unit tests the current working directory is used - # TODO: updated this logic when the following ticket is resolved - # https://github.com/canonical/operator/issues/643 - charm_dir = Path(os.getcwd()) - - dir_path = charm_dir.absolute().joinpath(*path_elements) - - if not dir_path.exists(): - raise InvalidDirectoryPathError(str(dir_path), "directory does not exist") - if not dir_path.is_dir(): - raise InvalidDirectoryPathError(str(dir_path), "is not a directory") - - return str(dir_path) - - -def _validate_relation_by_interface_and_direction( - charm: CharmBase, - relation_name: str, - expected_relation_interface: str, - expected_relation_role: RelationRole, -) -> None: - """Verifies that a relation has the necessary characteristics. - - Verifies that the `relation_name` provided: (1) exists in metadata.yaml, - (2) declares as interface the interface name passed as `relation_interface` - and (3) has the right "direction", i.e., it is a relation that `charm` - provides or requires. - - Args: - charm: a `CharmBase` object to scan for the matching relation. - relation_name: the name of the relation to be verified. - expected_relation_interface: the interface name to be matched by the - relation named `relation_name`. - expected_relation_role: whether the `relation_name` must be either - provided or required by `charm`. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - named like the value of the `relation_name` argument. - RelationInterfaceMismatchError: If the relation interface of the - relation named as the provided `relation_name` argument does not - match the `expected_relation_interface` argument. - RelationRoleMismatchError: If the relation named as the provided `relation_name` - argument has a different role than what is specified by the - `expected_relation_role` argument. - """ - if relation_name not in charm.meta.relations: - raise RelationNotFoundError(relation_name) - - relation = charm.meta.relations[relation_name] - - actual_relation_interface = relation.interface_name - if actual_relation_interface and actual_relation_interface != expected_relation_interface: - raise RelationInterfaceMismatchError( - relation_name, expected_relation_interface, actual_relation_interface - ) - - if expected_relation_role == RelationRole.provides: - if relation_name not in charm.meta.provides: - raise RelationRoleMismatchError( - relation_name, RelationRole.provides, RelationRole.requires - ) - elif expected_relation_role == RelationRole.requires: - if relation_name not in charm.meta.requires: - raise RelationRoleMismatchError( - relation_name, RelationRole.requires, RelationRole.provides - ) - else: - raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) - - -def _encode_dashboard_content(content: Union[str, bytes]) -> str: - if isinstance(content, str): - content = bytes(content, "utf-8") - - return base64.b64encode(lzma.compress(content)).decode("utf-8") - - -def _decode_dashboard_content(encoded_content: str) -> str: - return lzma.decompress(base64.b64decode(encoded_content.encode("utf-8"))).decode() - - -def _convert_dashboard_fields(content: str, inject_dropdowns: bool = True) -> str: - """Make sure values are present for Juju topology. - - Inserts Juju topology variables and selectors into the template, as well as - a variable for Prometheus. - """ - dict_content = json.loads(content) - datasources = {} - existing_templates = False - - template_dropdowns = ( - TOPOLOGY_TEMPLATE_DROPDOWNS + DATASOURCE_TEMPLATE_DROPDOWNS # type: ignore - if inject_dropdowns - else DATASOURCE_TEMPLATE_DROPDOWNS - ) - - # If the dashboard has __inputs, get the names to replace them. These are stripped - # from reactive dashboards in GrafanaDashboardAggregator, but charm authors in - # newer charms may import them directly from the marketplace - if "__inputs" in dict_content: - for field in dict_content["__inputs"]: - if "type" in field and field["type"] == "datasource": - datasources[field["name"]] = field["pluginName"].lower() - del dict_content["__inputs"] - - # If no existing template variables exist, just insert our own - if "templating" not in dict_content: - dict_content["templating"] = {"list": list(template_dropdowns)} # type: ignore - else: - # Otherwise, set a flag so we can go back later - existing_templates = True - for template_value in dict_content["templating"]["list"]: - # Build a list of `datasource_name`: `datasource_type` mappings - # The "query" field is actually "prometheus", "loki", "influxdb", etc - if "type" in template_value and template_value["type"] == "datasource": - datasources[template_value["name"]] = template_value["query"].lower() - - # Put our own variables in the template - for d in template_dropdowns: # type: ignore - if d not in dict_content["templating"]["list"]: - dict_content["templating"]["list"].insert(0, d) - - dict_content = _replace_template_fields(dict_content, datasources, existing_templates) - return json.dumps(dict_content) - - -def _replace_template_fields( # noqa: C901 - dict_content: dict, datasources: dict, existing_templates: bool -) -> dict: - """Make templated fields get cleaned up afterwards. - - If existing datasource variables are present, try to substitute them. - """ - replacements = {"loki": "${lokids}", "prometheus": "${prometheusds}"} - used_replacements = [] # type: List[str] - - # If any existing datasources match types we know, or we didn't find - # any templating variables at all, template them. - if datasources or not existing_templates: - panels = dict_content.get("panels", {}) - if panels: - dict_content["panels"] = _template_panels( - panels, replacements, used_replacements, existing_templates, datasources - ) - - # Find panels nested under rows - rows = dict_content.get("rows", {}) - if rows: - for row_idx, row in enumerate(rows): - if "panels" in row.keys(): - rows[row_idx]["panels"] = _template_panels( - row["panels"], - replacements, - used_replacements, - existing_templates, - datasources, - ) - - dict_content["rows"] = rows - - # Finally, go back and pop off the templates we stubbed out - deletions = [] - for tmpl in dict_content["templating"]["list"]: - if tmpl["name"] and tmpl["name"] in used_replacements: - deletions.append(tmpl) - - for d in deletions: - dict_content["templating"]["list"].remove(d) - - return dict_content - - -def _template_panels( - panels: dict, - replacements: dict, - used_replacements: list, - existing_templates: bool, - datasources: dict, -) -> dict: - """Iterate through a `panels` object and template it appropriately.""" - # Go through all the panels. If they have a datasource set, AND it's one - # that we can convert to ${lokids} or ${prometheusds}, by stripping off the - # ${} templating and comparing the name to the list we built, replace it, - # otherwise, leave it alone. - # - for panel in panels: - if "datasource" not in panel or not panel.get("datasource"): - continue - if not existing_templates: - datasource = panel.get("datasource") - if isinstance(datasource, str): - if "loki" in datasource: - panel["datasource"] = "${lokids}" - elif "grafana" in datasource: - continue - else: - panel["datasource"] = "${prometheusds}" - elif isinstance(datasource, dict): - # In dashboards exported by Grafana 9, datasource type is dict - dstype = datasource.get("type", "") - if dstype == "loki": - panel["datasource"]["uid"] = "${lokids}" - elif dstype == "prometheus": - panel["datasource"]["uid"] = "${prometheusds}" - else: - logger.debug("Unrecognized datasource type '%s'; skipping", dstype) - continue - else: - logger.error("Unknown datasource format: skipping") - continue - else: - if isinstance(panel["datasource"], str): - if panel["datasource"].lower() in replacements.values(): - # Already a known template variable - continue - # Strip out variable characters and maybe braces - ds = re.sub(r"(\$|\{|\})", "", panel["datasource"]) - - if ds not in datasources.keys(): - # Unknown, non-templated datasource, potentially a Grafana builtin - continue - - replacement = replacements.get(datasources[ds], "") - if replacement: - used_replacements.append(ds) - panel["datasource"] = replacement or panel["datasource"] - elif isinstance(panel["datasource"], dict): - dstype = panel["datasource"].get("type", "") - if panel["datasource"].get("uid", "").lower() in replacements.values(): - # Already a known template variable - continue - # Strip out variable characters and maybe braces - ds = re.sub(r"(\$|\{|\})", "", panel["datasource"].get("uid", "")) - - if ds not in datasources.keys(): - # Unknown, non-templated datasource, potentially a Grafana builtin - continue - - replacement = replacements.get(datasources[ds], "") - if replacement: - used_replacements.append(ds) - panel["datasource"]["uid"] = replacement - else: - logger.error("Unknown datasource format: skipping") - continue - return panels - - -def _inject_labels(content: str, topology: dict, transformer: "CosTool") -> str: - """Inject Juju topology into panel expressions via CosTool. - - A dashboard will have a structure approximating: - { - "__inputs": [], - "templating": { - "list": [ - { - "name": "prometheusds", - "type": "prometheus" - } - ] - }, - "panels": [ - { - "foo": "bar", - "targets": [ - { - "some": "field", - "expr": "up{job="foo"}" - }, - { - "some_other": "field", - "expr": "sum(http_requests_total{instance="$foo"}[5m])} - } - ], - "datasource": "${someds}" - } - ] - } - - `templating` is used elsewhere in this library, but the structure is not rigid. It is - not guaranteed that a panel will actually have any targets (it could be a "spacer" with - no datasource, hence no expression). It could have only one target. It could have multiple - targets. It could have multiple targets of which only one has an `expr` to evaluate. We need - to try to handle all of these concisely. - - `cos-tool` (`github.com/canonical/cos-tool` as a Go module in general) - does not know "Grafana-isms", such as using `[$_variable]` to modify the query from the user - interface, so we add placeholders (as `5y`, since it must parse, but a dashboard looking for - five years for a panel query would be unusual). - - Args: - content: dashboard content as a string - topology: a dict containing topology values - transformer: a 'CosTool' instance - Returns: - dashboard content with replaced values. - """ - dict_content = json.loads(content) - - if "panels" not in dict_content.keys(): - return json.dumps(dict_content) - - # Go through all the panels and inject topology labels - # Panels may have more than one 'target' where the expressions live, so that must be - # accounted for. Additionally, `promql-transform` does not necessarily gracefully handle - # expressions with range queries including variables. Exclude these. - # - # It is not a certainty that the `datasource` field will necessarily reflect the type, so - # operate on all fields. - panels = dict_content["panels"] - topology_with_prefix = {"juju_{}".format(k): v for k, v in topology.items()} - - # We need to use an index so we can insert the changed element back later - for panel_idx, panel in enumerate(panels): - if not isinstance(panel, dict): - continue - - # Use the index to insert it back in the same location - panels[panel_idx] = _modify_panel(panel, topology_with_prefix, transformer) - - return json.dumps(dict_content) - - -def _modify_panel(panel: dict, topology: dict, transformer: "CosTool") -> dict: - """Inject Juju topology into panel expressions via CosTool. - - Args: - panel: a dashboard panel as a dict - topology: a dict containing topology values - transformer: a 'CosTool' instance - Returns: - the panel with injected values - """ - if "targets" not in panel.keys(): - return panel - - # Pre-compile a regular expression to grab values from inside of [] - range_re = re.compile(r"\[(?P.*?)\]") - # Do the same for any offsets - offset_re = re.compile(r"offset\s+(?P-?\s*[$\w]+)") - - known_datasources = {"${prometheusds}": "promql", "${lokids}": "logql"} - - targets = panel["targets"] - - # We need to use an index so we can insert the changed element back later - for idx, target in enumerate(targets): - # If there's no expression, we don't need to do anything - if "expr" not in target.keys(): - continue - expr = target["expr"] - - if "datasource" not in panel.keys(): - continue - - if isinstance(panel["datasource"], str): - if panel["datasource"] not in known_datasources: - continue - querytype = known_datasources[panel["datasource"]] - elif isinstance(panel["datasource"], dict): - if panel["datasource"]["uid"] not in known_datasources: - continue - querytype = known_datasources[panel["datasource"]["uid"]] - else: - logger.error("Unknown datasource format: skipping") - continue - - # Capture all values inside `[]` into a list which we'll iterate over later to - # put them back in-order. Then apply the regex again and replace everything with - # `[5y]` so promql/parser will take it. - # - # Then do it again for offsets - range_values = [m.group("value") for m in range_re.finditer(expr)] - expr = range_re.sub(r"[5y]", expr) - - offset_values = [m.group("value") for m in offset_re.finditer(expr)] - expr = offset_re.sub(r"offset 5y", expr) - # Retrieve the new expression (which may be unchanged if there were no label - # matchers in the expression, or if tt was unable to be parsed like logql. It's - # virtually impossible to tell from any datasource "name" in a panel what the - # actual type is without re-implementing a complete dashboard parser, but no - # harm will some from passing invalid promql -- we'll just get the original back. - # - replacement = transformer.inject_label_matchers(expr, topology, querytype) - - if replacement == target["expr"]: - # promql-tranform caught an error. Move on - continue - - # Go back and substitute values in [] which were pulled out - # Enumerate with an index... again. The same regex is ok, since it will still match - # `[(.*?)]`, which includes `[5y]`, our placeholder - for i, match in enumerate(range_re.finditer(replacement)): - # Replace one-by-one, starting from the left. We build the string back with - # `str.replace(string_to_replace, replacement_value, count)`. Limit the count - # to one, since we are going through one-by-one through the list we saved earlier - # in `range_values`. - replacement = replacement.replace( - "[{}]".format(match.group("value")), - "[{}]".format(range_values[i]), - 1, - ) - - for i, match in enumerate(offset_re.finditer(replacement)): - # Replace one-by-one, starting from the left. We build the string back with - # `str.replace(string_to_replace, replacement_value, count)`. Limit the count - # to one, since we are going through one-by-one through the list we saved earlier - # in `range_values`. - replacement = replacement.replace( - "offset {}".format(match.group("value")), - "offset {}".format(offset_values[i]), - 1, - ) - - # Use the index to insert it back in the same location - targets[idx]["expr"] = replacement - - panel["targets"] = targets - return panel - - -def _type_convert_stored(obj): - """Convert Stored* to their appropriate types, recursively.""" - if isinstance(obj, StoredList): - return list(map(_type_convert_stored, obj)) - if isinstance(obj, StoredDict): - rdict = {} # type: Dict[Any, Any] - for k in obj.keys(): - rdict[k] = _type_convert_stored(obj[k]) - return rdict - return obj - - -class GrafanaDashboardsChanged(EventBase): - """Event emitted when Grafana dashboards change.""" - - def __init__(self, handle, data=None): - super().__init__(handle) - self.data = data - - def snapshot(self) -> Dict: - """Save grafana source information.""" - return {"data": self.data} - - def restore(self, snapshot): - """Restore grafana source information.""" - self.data = snapshot["data"] - - -class GrafanaDashboardEvents(ObjectEvents): - """Events raised by :class:`GrafanaSourceEvents`.""" - - dashboards_changed = EventSource(GrafanaDashboardsChanged) - - -class GrafanaDashboardEvent(EventBase): - """Event emitted when Grafana dashboards cannot be resolved. - - Enables us to set a clear status on the provider. - """ - - def __init__(self, handle, errors: List[Dict[str, str]] = [], valid: bool = False): - super().__init__(handle) - self.errors = errors - self.error_message = "; ".join([error["error"] for error in errors if "error" in error]) - self.valid = valid - - def snapshot(self) -> Dict: - """Save grafana source information.""" - return { - "error_message": self.error_message, - "valid": self.valid, - "errors": json.dumps(self.errors), - } - - def restore(self, snapshot): - """Restore grafana source information.""" - self.error_message = snapshot["error_message"] - self.valid = snapshot["valid"] - self.errors = json.loads(str(snapshot["errors"])) - - -class GrafanaProviderEvents(ObjectEvents): - """Events raised by :class:`GrafanaSourceEvents`.""" - - dashboard_status_changed = EventSource(GrafanaDashboardEvent) - - -class GrafanaDashboardProvider(Object): - """An API to provide Grafana dashboards to a Grafana charm.""" - - _stored = StoredState() - on = GrafanaProviderEvents() # pyright: ignore - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - dashboards_path: str = "src/grafana_dashboards", - ) -> None: - """API to provide Grafana dashboard to a Grafana charmed operator. - - The :class:`GrafanaDashboardProvider` object provides an API - to upload dashboards to a Grafana charm. In its most streamlined - usage, the :class:`GrafanaDashboardProvider` is integrated in a - charmed operator as follows: - - self.grafana = GrafanaDashboardProvider(self) - - The :class:`GrafanaDashboardProvider` will look for dashboard - templates in the `/grafana_dashboards` folder. - Additionally, dashboard templates can be uploaded programmatically - via the :method:`GrafanaDashboardProvider.add_dashboard` method. - - To use the :class:`GrafanaDashboardProvider` API, you need a relation - defined in your charm operator's metadata.yaml as follows: - - provides: - grafana-dashboard: - interface: grafana_dashboard - - If you would like to use relation name other than `grafana-dashboard`, - you will need to specify the relation name via the `relation_name` - argument when instantiating the :class:`GrafanaDashboardProvider` object. - However, it is strongly advised to keep the default relation name, - so that people deploying your charm will have a consistent experience - with all other charms that provide Grafana dashboards. - - It is possible to provide a different file path for the Grafana dashboards - to be automatically managed by the :class:`GrafanaDashboardProvider` object - via the `dashboards_path` argument. This may be necessary when the directory - structure of your charmed operator repository is not the "usual" one as - generated by `charmcraft init`, for example when adding the charmed operator - in a Java repository managed by Maven or Gradle. However, unless there are - such constraints with other tooling, it is strongly advised to store the - Grafana dashboards in the default `/grafana_dashboards` - folder, in order to provide a consistent experience for other charmed operator - authors. - - Args: - charm: a :class:`CharmBase` object which manages this - :class:`GrafanaProvider` object. Generally this is - `self` in the instantiating class. - relation_name: a :string: name of the relation managed by this - :class:`GrafanaDashboardProvider`; it defaults to "grafana-dashboard". - dashboards_path: a filesystem path relative to the charm root - where dashboard templates can be located. By default, the library - expects dashboard files to be in the `/grafana_dashboards` - directory. - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides - ) - - try: - dashboards_path = _resolve_dir_against_charm_path(charm, dashboards_path) - except InvalidDirectoryPathError as e: - logger.warning( - "Invalid Grafana dashboards folder at %s: %s", - e.grafana_dashboards_absolute_path, - e.message, - ) - - super().__init__(charm, relation_name) - - self._charm = charm - self._relation_name = relation_name - self._dashboards_path = dashboards_path - - # No peer relation bucket we can rely on providers, keep StoredState here, too - self._stored.set_default(dashboard_templates={}) # type: ignore - - self.framework.observe(self._charm.on.leader_elected, self._update_all_dashboards_from_dir) - self.framework.observe(self._charm.on.upgrade_charm, self._update_all_dashboards_from_dir) - - self.framework.observe( - self._charm.on[self._relation_name].relation_created, - self._on_grafana_dashboard_relation_created, - ) - self.framework.observe( - self._charm.on[self._relation_name].relation_changed, - self._on_grafana_dashboard_relation_changed, - ) - - def add_dashboard(self, content: str, inject_dropdowns: bool = True) -> None: - """Add a dashboard to the relation managed by this :class:`GrafanaDashboardProvider`. - - Args: - content: a string representing a Jinja template. Currently, no - global variables are added to the Jinja template evaluation - context. - inject_dropdowns: a :boolean: indicating whether topology dropdowns should be - added to the dashboard - """ - # Update of storage must be done irrespective of leadership, so - # that the stored state is there when this unit becomes leader. - stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore - - encoded_dashboard = _encode_dashboard_content(content) - - # Use as id the first chars of the encoded dashboard, so that - # it is predictable across units. - id = "prog:{}".format(encoded_dashboard[-24:-16]) - - stored_dashboard_templates[id] = self._content_to_dashboard_object( - encoded_dashboard, inject_dropdowns - ) - stored_dashboard_templates[id]["dashboard_alt_uid"] = self._generate_alt_uid(id) - - if self._charm.unit.is_leader(): - for dashboard_relation in self._charm.model.relations[self._relation_name]: - self._upset_dashboards_on_relation(dashboard_relation) - - def remove_non_builtin_dashboards(self) -> None: - """Remove all dashboards to the relation added via :method:`add_dashboard`.""" - # Update of storage must be done irrespective of leadership, so - # that the stored state is there when this unit becomes leader. - stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore - - for dashboard_id in list(stored_dashboard_templates.keys()): - if dashboard_id.startswith("prog:"): - del stored_dashboard_templates[dashboard_id] - self._stored.dashboard_templates = stored_dashboard_templates - - if self._charm.unit.is_leader(): - for dashboard_relation in self._charm.model.relations[self._relation_name]: - self._upset_dashboards_on_relation(dashboard_relation) - - def update_dashboards(self) -> None: - """Trigger the re-evaluation of the data on all relations.""" - if self._charm.unit.is_leader(): - for dashboard_relation in self._charm.model.relations[self._relation_name]: - self._upset_dashboards_on_relation(dashboard_relation) - - def _update_all_dashboards_from_dir( - self, _: Optional[HookEvent] = None, inject_dropdowns: bool = True - ) -> None: - """Scans the built-in dashboards and updates relations with changes.""" - # Update of storage must be done irrespective of leadership, so - # that the stored state is there when this unit becomes leader. - - # Ensure we do not leave outdated dashboards by removing from stored all - # the encoded dashboards that start with "file/". - if self._dashboards_path: - stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore - - for dashboard_id in list(stored_dashboard_templates.keys()): - if dashboard_id.startswith("file:"): - del stored_dashboard_templates[dashboard_id] - - # Path.glob uses fnmatch on the backend, which is pretty limited, so use a - # custom function for the filter - def _is_dashboard(p: Path) -> bool: - return p.is_file() and p.name.endswith((".json", ".json.tmpl", ".tmpl")) - - for path in filter(_is_dashboard, Path(self._dashboards_path).glob("*")): - # path = Path(path) - id = "file:{}".format(path.stem) - stored_dashboard_templates[id] = self._content_to_dashboard_object( - _encode_dashboard_content(path.read_bytes()), inject_dropdowns - ) - stored_dashboard_templates[id]["dashboard_alt_uid"] = self._generate_alt_uid(id) - - self._stored.dashboard_templates = stored_dashboard_templates - - if self._charm.unit.is_leader(): - for dashboard_relation in self._charm.model.relations[self._relation_name]: - self._upset_dashboards_on_relation(dashboard_relation) - - def _generate_alt_uid(self, key: str) -> str: - """Generate alternative uid for dashboards. - - Args: - key: A string used (along with charm.meta.name) to build the hash uid. - - Returns: A hash string. - """ - raw_dashboard_alt_uid = "{}-{}".format(self._charm.meta.name, key) - return hashlib.shake_256(raw_dashboard_alt_uid.encode("utf-8")).hexdigest(8) - - def _reinitialize_dashboard_data(self, inject_dropdowns: bool = True) -> None: - """Triggers a reload of dashboard outside of an eventing workflow. - - Args: - inject_dropdowns: a :bool: used to indicate whether topology dropdowns should be added - - This will destroy any existing relation data. - """ - try: - _resolve_dir_against_charm_path(self._charm, self._dashboards_path) - self._update_all_dashboards_from_dir(inject_dropdowns=inject_dropdowns) - - except InvalidDirectoryPathError as e: - logger.warning( - "Invalid Grafana dashboards folder at %s: %s", - e.grafana_dashboards_absolute_path, - e.message, - ) - stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore - - for dashboard_id in list(stored_dashboard_templates.keys()): - if dashboard_id.startswith("file:"): - del stored_dashboard_templates[dashboard_id] - self._stored.dashboard_templates = stored_dashboard_templates - - # With all the file-based dashboards cleared out, force a refresh - # of relation data - if self._charm.unit.is_leader(): - for dashboard_relation in self._charm.model.relations[self._relation_name]: - self._upset_dashboards_on_relation(dashboard_relation) - - def _on_grafana_dashboard_relation_created(self, event: RelationCreatedEvent) -> None: - """Watch for a relation being created and automatically send dashboards. - - Args: - event: The :class:`RelationJoinedEvent` sent when a - `grafana_dashboaard` relationship is joined - """ - if self._charm.unit.is_leader(): - self._update_all_dashboards_from_dir() - self._upset_dashboards_on_relation(event.relation) - - def _on_grafana_dashboard_relation_changed(self, event: RelationChangedEvent) -> None: - """Watch for changes so we know if there's an error to signal back to the parent charm. - - Args: - event: The `RelationChangedEvent` that triggered this handler. - """ - if self._charm.unit.is_leader(): - data = json.loads(event.relation.data[event.app].get("event", "{}")) # type: ignore - - if not data: - return - - valid = bool(data.get("valid", True)) - errors = data.get("errors", []) - if valid and not errors: - self.on.dashboard_status_changed.emit(valid=valid) # pyright: ignore - else: - self.on.dashboard_status_changed.emit( # pyright: ignore - valid=valid, errors=errors - ) - - def _upset_dashboards_on_relation(self, relation: Relation) -> None: - """Update the dashboards in the relation data bucket.""" - # It's completely ridiculous to add a UUID, but if we don't have some - # pseudo-random value, this never makes it across 'juju set-state' - stored_data = { - "templates": _type_convert_stored(self._stored.dashboard_templates), # pyright: ignore - "uuid": str(uuid.uuid4()), - } - - relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) - - def _content_to_dashboard_object(self, content: str, inject_dropdowns: bool = True) -> Dict: - return { - "charm": self._charm.meta.name, - "content": content, - "juju_topology": self._juju_topology if inject_dropdowns else {}, - "inject_dropdowns": inject_dropdowns, - } - - # This is not actually used in the dashboards, but is present to provide a secondary - # salt to ensure uniqueness in the dict keys in case individual charm units provide - # dashboards - @property - def _juju_topology(self) -> Dict: - return { - "model": self._charm.model.name, - "model_uuid": self._charm.model.uuid, - "application": self._charm.app.name, - "unit": self._charm.unit.name, - } - - @property - def dashboard_templates(self) -> List: - """Return a list of the known dashboard templates.""" - return list(self._stored.dashboard_templates.values()) # type: ignore - - -class GrafanaDashboardConsumer(Object): - """A consumer object for working with Grafana Dashboards.""" - - on = GrafanaDashboardEvents() # pyright: ignore - _stored = StoredState() - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - ) -> None: - """API to receive Grafana dashboards from charmed operators. - - The :class:`GrafanaDashboardConsumer` object provides an API - to consume dashboards provided by a charmed operator using the - :class:`GrafanaDashboardProvider` library. The - :class:`GrafanaDashboardConsumer` is integrated in a - charmed operator as follows: - - self.grafana = GrafanaDashboardConsumer(self) - - To use this library, you need a relation defined as follows in - your charm operator's metadata.yaml: - - requires: - grafana-dashboard: - interface: grafana_dashboard - - If you would like to use a different relation name than - `grafana-dashboard`, you need to specify the relation name via the - `relation_name` argument. However, it is strongly advised not to - change the default, so that people deploying your charm will have - a consistent experience with all other charms that consume Grafana - dashboards. - - Args: - charm: a :class:`CharmBase` object which manages this - :class:`GrafanaProvider` object. Generally this is - `self` in the instantiating class. - relation_name: a :string: name of the relation managed by this - :class:`GrafanaDashboardConsumer`; it defaults to "grafana-dashboard". - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires - ) - - super().__init__(charm, relation_name) - self._charm = charm - self._relation_name = relation_name - self._tranformer = CosTool(self._charm) - - self._stored.set_default(dashboards={}) # type: ignore - - self.framework.observe( - self._charm.on[self._relation_name].relation_changed, - self._on_grafana_dashboard_relation_changed, - ) - self.framework.observe( - self._charm.on[self._relation_name].relation_broken, - self._on_grafana_dashboard_relation_broken, - ) - self.framework.observe( - self._charm.on[DEFAULT_PEER_NAME].relation_changed, - self._on_grafana_peer_changed, - ) - - def get_dashboards_from_relation(self, relation_id: int) -> List: - """Get a list of known dashboards for one instance of the monitored relation. - - Args: - relation_id: the identifier of the relation instance, as returned by - :method:`ops.model.Relation.id`. - - Returns: a list of known dashboards coming from the provided relation instance. - """ - return [ - self._to_external_object(relation_id, dashboard) - for dashboard in self._get_stored_dashboards(relation_id) - ] - - def _on_grafana_dashboard_relation_changed(self, event: RelationChangedEvent) -> None: - """Handle relation changes in related providers. - - If there are changes in relations between Grafana dashboard consumers - and providers, this event handler (if the unit is the leader) will - get data for an incoming grafana-dashboard relation through a - :class:`GrafanaDashboardsChanged` event, and make the relation data - available in the app's datastore object. The Grafana charm can - then respond to the event to update its configuration. - """ - changes = False - if self._charm.unit.is_leader(): - changes = self._render_dashboards_and_signal_changed(event.relation) - - if changes: - self.on.dashboards_changed.emit() # pyright: ignore - - def _on_grafana_peer_changed(self, _: RelationChangedEvent) -> None: - """Emit dashboard events on peer events so secondary charm data updates.""" - if self._charm.unit.is_leader(): - return - self.on.dashboards_changed.emit() # pyright: ignore - - def update_dashboards(self, relation: Optional[Relation] = None) -> None: - """Re-establish dashboards on one or more relations. - - If something changes between this library and a datasource, try to re-establish - invalid dashboards and invalidate active ones. - - Args: - relation: a specific relation for which the dashboards have to be - updated. If not specified, all relations managed by this - :class:`GrafanaDashboardConsumer` will be updated. - """ - if self._charm.unit.is_leader(): - relations = ( - [relation] if relation else self._charm.model.relations[self._relation_name] - ) - - for relation in relations: - self._render_dashboards_and_signal_changed(relation) - - def _on_grafana_dashboard_relation_broken(self, event: RelationBrokenEvent) -> None: - """Update job config when providers depart. - - When a Grafana dashboard provider departs, the configuration - for that provider is removed from the list of dashboards - """ - if not self._charm.unit.is_leader(): - return - - self._remove_all_dashboards_for_relation(event.relation) - - def _render_dashboards_and_signal_changed(self, relation: Relation) -> bool: # type: ignore - """Validate a given dashboard. - - Verify that the passed dashboard data is able to be found in our list - of datasources and will render. If they do, let the charm know by - emitting an event. - - Args: - relation: Relation; The relation the dashboard is associated with. - - Returns: - a boolean indicating whether an event should be emitted - """ - other_app = relation.app - - raw_data = relation.data[other_app].get("dashboards", "") # pyright: ignore - - if not raw_data: - logger.warning( - "No dashboard data found in the %s:%s relation", - self._relation_name, - str(relation.id), - ) - return False - - data = json.loads(raw_data) - - # The only piece of data needed on this side of the relations is "templates" - templates = data.pop("templates") - - # The dashboards are WAY too big since this ultimately calls out to Juju to - # set the relation data, and it overflows the maximum argument length for - # subprocess, so we have to use b64, annoyingly. - # Worse, Python3 expects absolutely everything to be a byte, and a plain - # `base64.b64encode()` is still too large, so we have to go through hoops - # of encoding to byte, compressing with lzma, converting to base64 so it - # can be converted to JSON, then all the way back. - - rendered_dashboards = [] - relation_has_invalid_dashboards = False - - for _, (fname, template) in enumerate(templates.items()): - content = None - error = None - topology = template.get("juju_topology", {}) - try: - content = _decode_dashboard_content(template["content"]) - inject_dropdowns = template.get("inject_dropdowns", True) - content = self._manage_dashboard_uid(content, template) - content = _convert_dashboard_fields(content, inject_dropdowns) - - if topology: - content = _inject_labels(content, topology, self._tranformer) - - content = _encode_dashboard_content(content) - except lzma.LZMAError as e: - error = str(e) - relation_has_invalid_dashboards = True - except json.JSONDecodeError as e: - error = str(e.msg) - logger.warning("Invalid JSON in Grafana dashboard: {}".format(fname)) - continue - - # Prepend the relation name and ID to the dashboard ID to avoid clashes with - # multiple relations with apps from the same charm, or having dashboards with - # the same ids inside their charm operators - rendered_dashboards.append( - { - "id": "{}:{}/{}".format(relation.name, relation.id, fname), - "original_id": fname, - "content": content if content else None, - "template": template, - "valid": (error is None), - "error": error, - } - ) - - if relation_has_invalid_dashboards: - self._remove_all_dashboards_for_relation(relation) - - invalid_templates = [ - data["original_id"] for data in rendered_dashboards if not data["valid"] - ] - - logger.warning( - "Cannot add one or more Grafana dashboards from relation '{}:{}': the following " - "templates are invalid: {}".format( - relation.name, - relation.id, - invalid_templates, - ) - ) - - relation.data[self._charm.app]["event"] = json.dumps( - { - "errors": [ - { - "dashboard_id": rendered_dashboard["original_id"], - "error": rendered_dashboard["error"], - } - for rendered_dashboard in rendered_dashboards - if rendered_dashboard["error"] - ] - } - ) - - # Dropping dashboards for a relation needs to be signalled - return True - - stored_data = rendered_dashboards - currently_stored_data = self._get_stored_dashboards(relation.id) - - coerced_data = _type_convert_stored(currently_stored_data) if currently_stored_data else {} - - if not coerced_data == stored_data: - stored_dashboards = self.get_peer_data("dashboards") - stored_dashboards[relation.id] = stored_data - self.set_peer_data("dashboards", stored_dashboards) - return True - return None # type: ignore - - def _manage_dashboard_uid(self, dashboard: str, template: dict) -> str: - """Add an uid to the dashboard if it is not present.""" - dashboard_dict = json.loads(dashboard) - - if not dashboard_dict.get("uid", None) and "dashboard_alt_uid" in template: - dashboard_dict["uid"] = template["dashboard_alt_uid"] - - return json.dumps(dashboard_dict) - - def _remove_all_dashboards_for_relation(self, relation: Relation) -> None: - """If an errored dashboard is in stored data, remove it and trigger a deletion.""" - if self._get_stored_dashboards(relation.id): - stored_dashboards = self.get_peer_data("dashboards") - stored_dashboards.pop(str(relation.id)) - self.set_peer_data("dashboards", stored_dashboards) - self.on.dashboards_changed.emit() # pyright: ignore - - def _to_external_object(self, relation_id, dashboard): - return { - "id": dashboard["original_id"], - "relation_id": relation_id, - "charm": dashboard["template"]["charm"], - "content": _decode_dashboard_content(dashboard["content"]), - } - - @property - def dashboards(self) -> List[Dict]: - """Get a list of known dashboards across all instances of the monitored relation. - - Returns: a list of known dashboards. The JSON of each of the dashboards is available - in the `content` field of the corresponding `dict`. - """ - dashboards = [] - - for _, (relation_id, dashboards_for_relation) in enumerate( - self.get_peer_data("dashboards").items() - ): - for dashboard in dashboards_for_relation: - dashboards.append(self._to_external_object(relation_id, dashboard)) - - return dashboards - - def _get_stored_dashboards(self, relation_id: int) -> list: - """Pull stored dashboards out of the peer data bucket.""" - return self.get_peer_data("dashboards").get(str(relation_id), {}) - - def _set_default_data(self) -> None: - """Set defaults if they are not in peer relation data.""" - data = {"dashboards": {}} # type: ignore - for k, v in data.items(): - if not self.get_peer_data(k): - self.set_peer_data(k, v) - - def set_peer_data(self, key: str, data: Any) -> None: - """Put information into the peer data bucket instead of `StoredState`.""" - self._charm.peers.data[self._charm.app][key] = json.dumps(data) # type: ignore[attr-defined] - - def get_peer_data(self, key: str) -> Any: - """Retrieve information from the peer data bucket instead of `StoredState`.""" - data = self._charm.peers.data[self._charm.app].get(key, "") # type: ignore[attr-defined] - return json.loads(data) if data else {} - - -class GrafanaDashboardAggregator(Object): - """API to retrieve Grafana dashboards from machine dashboards. - - The :class:`GrafanaDashboardAggregator` object provides a way to - collate and aggregate Grafana dashboards from reactive/machine charms - and transport them into Charmed Operators, using Juju topology. - For detailed usage instructions, see the documentation for - :module:`cos-proxy-operator`, as this class is intended for use as a - single point of intersection rather than use in individual charms. - - Since :class:`GrafanaDashboardAggregator` serves as a bridge between - Canonical Observability Stack Charmed Operators and Reactive Charms, - deployed in a Reactive Juju model, both a target relation which is - used to collect events from Reactive charms and a `grafana_relation` - which is used to send the collected data back to the Canonical - Observability Stack are required. - - In its most streamlined usage, :class:`GrafanaDashboardAggregator` is - integrated in a charmed operator as follows: - self.grafana = GrafanaDashboardAggregator(self) - - Args: - charm: a :class:`CharmBase` object which manages this - :class:`GrafanaProvider` object. Generally this is - `self` in the instantiating class. - target_relation: a :string: name of a relation managed by this - :class:`GrafanaDashboardAggregator`, which is used to communicate - with reactive/machine charms it defaults to "dashboards". - grafana_relation: a :string: name of a relation used by this - :class:`GrafanaDashboardAggregator`, which is used to communicate - with charmed grafana. It defaults to "downstream-grafana-dashboard" - """ - - _stored = StoredState() - on = GrafanaProviderEvents() # pyright: ignore - - def __init__( - self, - charm: CharmBase, - target_relation: str = "dashboards", - grafana_relation: str = "downstream-grafana-dashboard", - ): - super().__init__(charm, grafana_relation) - - # Reactive charms may be RPC-ish and not leave reliable data around. Keep - # StoredState here - self._stored.set_default( # type: ignore - dashboard_templates={}, - id_mappings={}, - ) - - self._charm = charm - self._target_relation = target_relation - self._grafana_relation = grafana_relation - - self.framework.observe( - self._charm.on[self._grafana_relation].relation_joined, - self._update_remote_grafana, - ) - self.framework.observe( - self._charm.on[self._grafana_relation].relation_changed, - self._update_remote_grafana, - ) - self.framework.observe( - self._charm.on[self._target_relation].relation_changed, - self.update_dashboards, - ) - self.framework.observe( - self._charm.on[self._target_relation].relation_broken, - self.remove_dashboards, - ) - - def update_dashboards(self, event: RelationEvent) -> None: - """If we get a dashboard from a reactive charm, parse it out and update.""" - if self._charm.unit.is_leader(): - self._upset_dashboards_on_event(event) - - def _upset_dashboards_on_event(self, event: RelationEvent) -> None: - """Update the dashboards in the relation data bucket.""" - dashboards = self._handle_reactive_dashboards(event) - - if not dashboards: - logger.warning( - "Could not find dashboard data after a relation change for {}".format(event.app) - ) - return - - for id in dashboards: - self._stored.dashboard_templates[id] = self._content_to_dashboard_object( # type: ignore - dashboards[id], event - ) - - self._stored.id_mappings[event.app.name] = dashboards # type: ignore - self._update_remote_grafana(event) - - def _update_remote_grafana(self, _: Optional[RelationEvent] = None) -> None: - """Push dashboards to the downstream Grafana relation.""" - # It's still ridiculous to add a UUID here, but needed - stored_data = { - "templates": _type_convert_stored(self._stored.dashboard_templates), # pyright: ignore - "uuid": str(uuid.uuid4()), - } - - if self._charm.unit.is_leader(): - for grafana_relation in self.model.relations[self._grafana_relation]: - grafana_relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) - - def remove_dashboards(self, event: RelationBrokenEvent) -> None: - """Remove a dashboard if the relation is broken.""" - app_ids = _type_convert_stored(self._stored.id_mappings.get(event.app.name, "")) # type: ignore - - if not app_ids: - logger.info("Could not look up stored dashboards for %s", event.app.name) # type: ignore - return - - del self._stored.id_mappings[event.app.name] # type: ignore - for id in app_ids: - del self._stored.dashboard_templates[id] # type: ignore - - stored_data = { - "templates": _type_convert_stored(self._stored.dashboard_templates), # pyright: ignore - "uuid": str(uuid.uuid4()), - } - - if self._charm.unit.is_leader(): - for grafana_relation in self.model.relations[self._grafana_relation]: - grafana_relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) - - # Yes, this has a fair amount of branching. It's not that complex, though - def _strip_existing_datasources(self, dash: dict) -> dict: # noqa: C901 - """Remove existing reactive charm datasource templating out. - - This method iterates through *known* places where reactive charms may set - data in contributed dashboards and removes them. - - `dashboard["__inputs"]` is a property sometimes set when exporting dashboards from - the Grafana UI. It is not present in earlier Grafana versions, and can be disabled - in 5.3.4 and above (optionally). If set, any values present will be substituted on - import. Some reactive charms use this for Prometheus. COS uses dropdown selectors - for datasources, and leaving this present results in "default" datasource values - which are broken. - - Similarly, `dashboard["templating"]["list"][N]["name"] == "host"` can be used to - set a `host` variable for use in dashboards which is not meaningful in the context - of Juju topology and will yield broken dashboards. - - Further properties may be discovered. - """ - try: - if "list" in dash["templating"]: - for i in range(len(dash["templating"]["list"])): - if ( - "datasource" in dash["templating"]["list"][i] - and dash["templating"]["list"][i]["datasource"] is not None - ): - if "Juju" in dash["templating"]["list"][i].get("datasource", ""): - dash["templating"]["list"][i]["datasource"] = r"${prometheusds}" - - # Strip out newly-added 'juju_application' template variables which - # don't line up with our drop-downs - dash_mutable = dash - for i in range(len(dash["templating"]["list"])): - if ( - "name" in dash["templating"]["list"][i] - and dash["templating"]["list"][i].get("name", "") == "app" - ): - del dash_mutable["templating"]["list"][i] - - if dash_mutable: - dash = dash_mutable - except KeyError: - logger.debug("No existing templating data in dashboard") - - if "__inputs" in dash: - inputs = dash - for i in range(len(dash["__inputs"])): - if dash["__inputs"][i].get("pluginName", "") == "Prometheus": - del inputs["__inputs"][i] - if inputs: - dash["__inputs"] = inputs["__inputs"] - else: - del dash["__inputs"] - - return dash - - def _handle_reactive_dashboards(self, event: RelationEvent) -> Optional[Dict]: - """Look for a dashboard in relation data (during a reactive hook) or builtin by name.""" - if not self._charm.unit.is_leader(): - return {} - - templates = [] - id = "" - - # Reactive data can reliably be pulled out of events. In theory, if we got an event, - # it's on the bucket, but using event explicitly keeps the mental model in - # place for reactive - for k in event.relation.data[event.unit].keys(): # type: ignore - if k.startswith("request_"): - templates.append(json.loads(event.relation.data[event.unit][k])["dashboard"]) # type: ignore - - for k in event.relation.data[event.app].keys(): # type: ignore - if k.startswith("request_"): - templates.append(json.loads(event.relation.data[event.app][k])["dashboard"]) # type: ignore - - builtins = self._maybe_get_builtin_dashboards(event) - - if not templates and not builtins: - logger.warning("NOTHING!") - return {} - - dashboards = {} - for t in templates: - # This seems ridiculous, too, but to get it from a "dashboards" key in serialized JSON - # in the bucket back out to the actual "dashboard" we _need_, this is the way - # This is not a mistake -- there's a double nesting in reactive charms, and - # Grafana won't load it. We have to unbox: - # event.relation.data[event.]["request_*"]["dashboard"]["dashboard"], - # and the final unboxing is below. - # - # Apparently SOME newer dashboards (such as Ceph) do not have this double nesting, so - # now we get to account for both :toot: - dash = t.get("dashboard", {}) or t - - # Replace values with LMA-style templating - dash = self._strip_existing_datasources(dash) - dash = json.dumps(dash) - - # Replace the old-style datasource templates - dash = re.sub(r"<< datasource >>", r"${prometheusds}", dash) - dash = re.sub(r'"datasource": "prom.*?"', r'"datasource": "${prometheusds}"', dash) - dash = re.sub( - r'"datasource": "\$datasource"', r'"datasource": "${prometheusds}"', dash - ) - dash = re.sub(r'"uid": "\$datasource"', r'"uid": "${prometheusds}"', dash) - dash = re.sub( - r'"datasource": "(!?\w)[\w|\s|-]+?Juju generated.*?"', - r'"datasource": "${prometheusds}"', - dash, - ) - - # Yank out "new"+old LMA topology - dash = re.sub( - r'(,?\s?juju_application=~)\\"\$app\\"', r'\1\\"$juju_application\\"', dash - ) - - # Replace old piechart panels - dash = re.sub(r'"type": "grafana-piechart-panel"', '"type": "piechart"', dash) - - from jinja2 import DebugUndefined, Template - - content = _encode_dashboard_content( - Template(dash, undefined=DebugUndefined).render(datasource=r"${prometheusds}") # type: ignore - ) - id = "prog:{}".format(content[-24:-16]) - - dashboards[id] = content - return {**builtins, **dashboards} - - def _maybe_get_builtin_dashboards(self, event: RelationEvent) -> Dict: - """Tries to match the event with an included dashboard. - - Scans dashboards packed with the charm instantiating this class, and tries to match - one with the event. There is no guarantee that any given event will match a builtin, - since each charm instantiating this class may include a different set of dashboards, - or none. - """ - builtins = {} - dashboards_path = None - - try: - dashboards_path = _resolve_dir_against_charm_path( - self._charm, "src/grafana_dashboards" - ) - except InvalidDirectoryPathError as e: - logger.warning( - "Invalid Grafana dashboards folder at %s: %s", - e.grafana_dashboards_absolute_path, - e.message, - ) - - if dashboards_path: - - def is_dashboard(p: Path) -> bool: - return p.is_file() and p.name.endswith((".json", ".json.tmpl", ".tmpl")) - - for path in filter(is_dashboard, Path(dashboards_path).glob("*")): - # path = Path(path) - if event.app.name in path.name: # type: ignore - id = "file:{}".format(path.stem) - builtins[id] = self._content_to_dashboard_object( - _encode_dashboard_content(path.read_bytes()), event - ) - - return builtins - - def _content_to_dashboard_object(self, content: str, event: RelationEvent) -> Dict: - return { - "charm": event.app.name, # type: ignore - "content": content, - "juju_topology": self._juju_topology(event), - "inject_dropdowns": True, - } - - # This is not actually used in the dashboards, but is present to provide a secondary - # salt to ensure uniqueness in the dict keys in case individual charm units provide - # dashboards - def _juju_topology(self, event: RelationEvent) -> Dict: - return { - "model": self._charm.model.name, - "model_uuid": self._charm.model.uuid, - "application": event.app.name, # type: ignore - "unit": event.unit.name, # type: ignore - } - - -class CosTool: - """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" - - _path = None - _disabled = False - - def __init__(self, charm): - self._charm = charm - - @property - def path(self): - """Lazy lookup of the path of cos-tool.""" - if self._disabled: - return None - if not self._path: - self._path = self._get_tool_path() - if not self._path: - logger.debug("Skipping injection of juju topology as label matchers") - self._disabled = True - return self._path - - def apply_label_matchers(self, rules: dict, type: str) -> dict: - """Will apply label matchers to the expression of all alerts in all supplied groups.""" - if not self.path: - return rules - for group in rules["groups"]: - rules_in_group = group.get("rules", []) - for rule in rules_in_group: - topology = {} - # if the user for some reason has provided juju_unit, we'll need to honor it - # in most cases, however, this will be empty - for label in [ - "juju_model", - "juju_model_uuid", - "juju_application", - "juju_charm", - "juju_unit", - ]: - if label in rule["labels"]: - topology[label] = rule["labels"][label] - - rule["expr"] = self.inject_label_matchers(rule["expr"], topology, type) - return rules - - def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: - """Will validate correctness of alert rules, returning a boolean and any errors.""" - if not self.path: - logger.debug("`cos-tool` unavailable. Not validating alert correctness.") - return True, "" - - with tempfile.TemporaryDirectory() as tmpdir: - rule_path = Path(tmpdir + "/validate_rule.yaml") - - # Smash "our" rules format into what upstream actually uses, which is more like: - # - # groups: - # - name: foo - # rules: - # - alert: SomeAlert - # expr: up - # - alert: OtherAlert - # expr: up - transformed_rules = {"groups": []} # type: ignore - for rule in rules["groups"]: - transformed = {"name": str(uuid.uuid4()), "rules": [rule]} - transformed_rules["groups"].append(transformed) - - rule_path.write_text(yaml.dump(transformed_rules)) - - args = [str(self.path), "validate", str(rule_path)] - # noinspection PyBroadException - try: - self._exec(args) - return True, "" - except subprocess.CalledProcessError as e: - logger.debug("Validating the rules failed: %s", e.output) - return False, ", ".join([line for line in e.output if "error validating" in line]) - - def inject_label_matchers(self, expression: str, topology: dict, type: str) -> str: - """Add label matchers to an expression.""" - if not topology: - return expression - if not self.path: - logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) - return expression - args = [str(self.path), "--format", type, "transform"] - - variable_topology = {k: "${}".format(k) for k in topology.keys()} - args.extend( - [ - "--label-matcher={}={}".format(key, value) - for key, value in variable_topology.items() - ] - ) - - # Pass a leading "--" so expressions with a negation or subtraction aren't interpreted as - # flags - args.extend(["--", "{}".format(expression)]) - # noinspection PyBroadException - try: - return re.sub(r'="\$juju', r'=~"$juju', self._exec(args)) - except subprocess.CalledProcessError as e: - logger.debug('Applying the expression failed: "%s", falling back to the original', e) - return expression - - def _get_tool_path(self) -> Optional[Path]: - arch = platform.machine() - arch = "amd64" if arch == "x86_64" else arch - res = "cos-tool-{}".format(arch) - try: - path = Path(res).resolve() - path.chmod(0o777) - return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: - logger.debug('Could not locate cos-tool at: "{}"'.format(res)) - return None - - def _exec(self, cmd) -> str: - result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) - output = result.stdout.decode("utf-8").strip() - return output diff --git a/charm/lib/charms/loki_k8s/v0/loki_push_api.py b/charm/lib/charms/loki_k8s/v0/loki_push_api.py deleted file mode 100644 index 003df3c..0000000 --- a/charm/lib/charms/loki_k8s/v0/loki_push_api.py +++ /dev/null @@ -1,2499 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. -# -# Learn more at: https://juju.is/docs/sdk - -r"""## Overview. - -This document explains how to use the two principal objects this library provides: - -- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to -implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. -The provider side of the relation represents the server side, to which logs are being pushed. - -- `LokiPushApiConsumer`: Used to obtain the loki api endpoint. This is useful for configuring - applications such as pebble, or charmed operators of workloads such as grafana-agent or promtail, - that can communicate with loki directly. - -- `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to -send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the -`loki_push_api` relation interface. - -Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju -topology labels are used to uniquely identify the workload which generates telemetry like logs. - -In order to be able to control the labels on the logs pushed this object adds a Pebble layer -that runs Promtail in the workload container, injecting Juju topology labels into the -logs on the fly. - -## LokiPushApiProvider Library Usage - -This object may be used by any Charmed Operator which implements the `loki_push_api` interface. -For instance, Loki or Grafana Agent. - -For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory -and three optional arguments. - -- `charm`: A reference to the parent (Loki) charm. - -- `relation_name`: The name of the relation that the charm uses to interact - with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. - - If provided, this relation name must match a provided relation in metadata.yaml with the - `loki_push_api` interface. - - The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for - `LogProxyConsumer`. - - For example, a provider's `metadata.yaml` file may look as follows: - - ```yaml - provides: - logging: - interface: loki_push_api - ``` - - Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as - follows: - - from charms.loki_k8s.v0.loki_push_api import LokiPushApiProvider - from loki_server import LokiServer - ... - - class LokiOperatorCharm(CharmBase): - ... - - def __init__(self, *args): - super().__init__(*args) - ... - external_url = urlparse(self._external_url) - self.loki_provider = LokiPushApiProvider( - self, - address=external_url.hostname or self.hostname, - port=external_url.port or 80, - scheme=external_url.scheme, - path=f"{external_url.path}/loki/api/v1/push", - ) - ... - - - `port`: Loki Push Api endpoint port. Default value: `3100`. - - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP` - - `address`: Loki Push Api endpoint address. Default value: `localhost` - - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push` - - -The `LokiPushApiProvider` object has several responsibilities: - -1. Set the URL of the Loki Push API in the relation application data bag; the URL - must be unique to all instances (e.g. using a load balancer). - -2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use - `LogProxyConsumer` object could download and configure it. - -3. Process the metadata of the consumer application, provided via the - "metadata" field of the consumer data bag, which are used to annotate the - alert rules (see next point). An example for "metadata" is the following: - - {'model': 'loki', - 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', - 'application': 'promtail-k8s' - } - -4. Process alert rules set into the relation by the `LokiPushApiConsumer` - objects, e.g.: - - '{ - "groups": [{ - "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", - "rules": [{ - "alert": "HighPercentageError", - "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) - by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) - by (job)\\n > 0.05 - \\n", "for": "10m", - "labels": { - "severity": "page", - "juju_model": "loki", - "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", - "juju_application": "promtail-k8s" - }, - "annotations": { - "summary": "High request latency" - } - }] - }] - }' - - -Once these alert rules are sent over relation data, the `LokiPushApiProvider` object -stores these files in the directory `/loki/rules` inside the Loki charm container. After -storing alert rules files, the object will check alert rules by querying Loki API -endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). -If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will -be emitted with details about the `RelationEvent` which triggered it. - -This events should be observed in the charm that uses `LokiPushApiProvider`: - -```python - def __init__(self, *args): - super().__init__(*args) - ... - self.loki_provider = LokiPushApiProvider(self) - self.framework.observe( - self.loki_provider.on.loki_push_api_alert_rules_changed, - self._loki_push_api_alert_rules_changed, - ) -``` - - -## LokiPushApiConsumer Library Usage - -This Loki charm interacts with its clients using the Loki charm library. Charms -seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from -this charm library. - -> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. -> -> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` -> when using this library. - -For the simplest use cases, using the `LokiPushApiConsumer` object only requires -instantiating it, typically in the constructor of your charm (the one which -sends logs). - -```python -from charms.loki_k8s.v0.loki_push_api import LokiPushApiConsumer - -class LokiClientCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - ... - self._loki_consumer = LokiPushApiConsumer(self) -``` - -The `LokiPushApiConsumer` constructor requires two things: - -- A reference to the parent (LokiClientCharm) charm. - -- Optionally, the name of the relation that the Loki charm uses to interact - with its clients. If provided, this relation name must match a required - relation in metadata.yaml with the `loki_push_api` interface. - - This argument is not required if your metadata.yaml has precisely one - required relation in metadata.yaml with the `loki_push_api` interface, as the - lib will automatically resolve the relation name inspecting the using the - meta information of the charm - -Any time the relation between a Loki provider charm and a Loki consumer charm is -established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side -is it possible to observe this event with: - -```python - -self.framework.observe( - self._loki_consumer.on.loki_push_api_endpoint_joined, - self._on_loki_push_api_endpoint_joined, -) -``` - -Any time there are departures in relations between the consumer charm and Loki -the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: - -```python -self.framework.observe( - self._loki_consumer.on.loki_push_api_endpoint_departed, - self._on_loki_push_api_endpoint_departed, -) -``` - -The consumer charm can then choose to update its configuration in both situations. - -Note that LokiPushApiConsumer does not add any labels automatically on its own. In -order to better integrate with the Canonical Observability Stack, you may want to configure your -software to add Juju topology labels. The -[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology -labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how -to do this with promtail. - -## LogProxyConsumer Library Usage - -Let's say that we have a workload charm that produces logs, and we need to send those logs to a -workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. - -Adopting this object in a Charmed Operator consist of two steps: - -1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed - operator. There are two ways to get logs in to promtail. You can give it a list of files to - read, or you can write to it using the syslog protocol. - - For example: - - ```python - from charms.loki_k8s.v0.loki_push_api import LogProxyConsumer - - ... - - def __init__(self, *args): - ... - self._log_proxy = LogProxyConsumer( - charm=self, log_files=LOG_FILES, container_name=PEER, enable_syslog=True - ) - - self.framework.observe( - self._log_proxy.on.promtail_digest_error, - self._promtail_error, - ) - - def _promtail_error(self, event): - logger.error(event.message) - self.unit.status = BlockedStatus(event.message) - ``` - - Any time the relation between a provider charm and a LogProxy consumer charm is - established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it - possible to observe this event with: - - ```python - - self.framework.observe( - self._log_proxy.on.log_proxy_endpoint_joined, - self._on_log_proxy_endpoint_joined, - ) - ``` - - Any time there are departures in relations between the consumer charm and the provider - the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: - - ```python - self.framework.observe( - self._log_proxy.on.log_proxy_endpoint_departed, - self._on_log_proxy_endpoint_departed, - ) - ``` - - The consumer charm can then choose to update its configuration in both situations. - - Note that: - - - `LOG_FILES` is a `list` containing the log files we want to send to `Loki` or - `Grafana Agent`, for instance: - - ```python - LOG_FILES = [ - "/var/log/apache2/access.log", - "/var/log/alternatives.log", - ] - ``` - - - `container_name` is the name of the container in which the application is running. - If in the Pod there is only one container, this argument can be omitted. - - - You can configure your syslog software using `localhost` as the address and the method - `LogProxyConsumer.syslog_port` to get the port, or, alternatively, if you are using rsyslog - you may use the method `LogProxyConsumer.rsyslog_config()`. - -2. Modify the `metadata.yaml` file to add: - - - The `log-proxy` relation in the `requires` section: - ```yaml - requires: - log-proxy: - interface: loki_push_api - optional: true - ``` - -Once the library is implemented in a Charmed Operator and a relation is established with -the charm that implements the `loki_push_api` interface, the library will inject a -Pebble layer that runs Promtail in the workload container to send logs. - -By default, the promtail binary injected into the container will be downloaded from the internet. -If, for any reason, the container has limited network access, you may allow charm administrators -to provide their own promtail binary at runtime by adding the following snippet to your charm -metadata: - -```yaml -resources: - promtail-bin: - type: file - description: Promtail binary for logging - filename: promtail-linux -``` - -Which would then allow operators to deploy the charm this way: - -``` -juju deploy \ - ./your_charm.charm \ - --resource promtail-bin=/tmp/promtail-linux-amd64 -``` - -If a different resource name is used, it can be specified with the `promtail_resource_name` -argument to the `LogProxyConsumer` constructor. - -The object can emit a `PromtailDigestError` event: - -- Promtail binary cannot be downloaded. -- The sha256 sum mismatch for promtail binary. - -The object can raise a `ContainerNotFoundError` event: - -- No `container_name` parameter has been specified and the Pod has more than 1 container. - -These can be monitored via the PromtailDigestError events via: - -```python - self.framework.observe( - self._loki_consumer.on.promtail_digest_error, - self._promtail_error, - ) - - def _promtail_error(self, event): - logger.error(msg) - self.unit.status = BlockedStatus(event.message) - ) -``` - -## Alerting Rules - -This charm library also supports gathering alerting rules from all related Loki client -charms and enabling corresponding alerts within the Loki charm. Alert rules are -automatically gathered by `LokiPushApiConsumer` object from a directory conventionally -named `loki_alert_rules`. - -This directory must reside at the top level in the `src` folder of the -consumer charm. Each file in this directory is assumed to be a single alert rule -in YAML format. The file name must have the `.rule` extension. -The format of this alert rule conforms to the -[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). - -An example of the contents of one such file is shown below. - -```yaml -alert: HighPercentageError -expr: | - sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) - / - sum(rate({%%juju_topology%%}[5m])) by (job) - > 0.05 -for: 10m -labels: - severity: page -annotations: - summary: High request latency - -``` - -It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert -rule shown above. This filter is a stub that is automatically replaced by the -`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its -UUID). Such a topology filter is essential to ensure that alert rules submitted by one -provider charm generates alerts only for that same charm. - -The Loki charm may be related to multiple Loki client charms. Without this, filter -rules submitted by one provider charm will also result in corresponding alerts for other -provider charms. Hence, every alert rule expression must include such a topology filter stub. - -Gathering alert rules and generating rule files within the Loki charm is easily done using -the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically -include Juju topology labels in the alerts. These labels indicate the source of the alert. - -The following labels are automatically added to every alert - -- `juju_model` -- `juju_model_uuid` -- `juju_application` - - -Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert -rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. - -To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: - -```python -class LokiClientCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - ... - self._loki_consumer = LokiPushApiConsumer(self) - - self.framework.observe( - self._loki_consumer.on.loki_push_api_alert_rules_error, - self._alert_rules_error - ) - - def _alert_rules_error(self, event): - self.unit.status = BlockedStatus(event.message) -``` - -## Relation Data - -The Loki charm uses both application and unit relation data to obtain information regarding -Loki Push API and alert rules. - -Units of consumer charm send their alert rules over app relation data using the `alert_rules` -key. -""" - -import json -import logging -import os -import platform -import re -import socket -import subprocess -import tempfile -import typing -from copy import deepcopy -from gzip import GzipFile -from hashlib import sha256 -from io import BytesIO -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union, cast -from urllib import request -from urllib.error import HTTPError - -import yaml -from cosl import JujuTopology -from ops.charm import ( - CharmBase, - HookEvent, - RelationBrokenEvent, - RelationCreatedEvent, - RelationDepartedEvent, - RelationEvent, - RelationJoinedEvent, - RelationRole, - WorkloadEvent, -) -from ops.framework import EventBase, EventSource, Object, ObjectEvents -from ops.model import Container, ModelError, Relation -from ops.pebble import APIError, ChangeError, PathError, ProtocolError - -# The unique Charmhub library identifier, never change it -LIBID = "bf76f23cdd03464b877c52bd1d2f563e" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 28 - -logger = logging.getLogger(__name__) - -RELATION_INTERFACE_NAME = "loki_push_api" -DEFAULT_RELATION_NAME = "logging" -DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" -DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" - -PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" -# To update Promtail version you only need to change the PROMTAIL_VERSION and -# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture -# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. -PROMTAIL_VERSION = "v2.5.0" -PROMTAIL_BINARIES = { - "amd64": { - "filename": "promtail-static-amd64", - "zipsha": "543e333b0184e14015a42c3c9e9e66d2464aaa66eca48b29e185a6a18f67ab6d", - "binsha": "17e2e271e65f793a9fbe81eab887b941e9d680abe82d5a0602888c50f5e0cac9", - }, -} - -# Paths in `charm` container -BINARY_DIR = "/tmp" - -# Paths in `workload` container -WORKLOAD_BINARY_DIR = "/opt/promtail" -WORKLOAD_CONFIG_DIR = "/etc/promtail" -WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" -WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) -WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) -WORKLOAD_SERVICE_NAME = "promtail" - -HTTP_LISTEN_PORT = 9080 -GRPC_LISTEN_PORT = 9095 - - -class RelationNotFoundError(ValueError): - """Raised if there is no relation with the given name.""" - - def __init__(self, relation_name: str): - self.relation_name = relation_name - self.message = "No relation named '{}' found".format(relation_name) - - super().__init__(self.message) - - -class RelationInterfaceMismatchError(Exception): - """Raised if the relation with the given name has a different interface.""" - - def __init__( - self, - relation_name: str, - expected_relation_interface: str, - actual_relation_interface: str, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_interface - self.actual_relation_interface = actual_relation_interface - self.message = ( - "The '{}' relation has '{}' as interface rather than the expected '{}'".format( - relation_name, actual_relation_interface, expected_relation_interface - ) - ) - super().__init__(self.message) - - -class RelationRoleMismatchError(Exception): - """Raised if the relation with the given name has a different direction.""" - - def __init__( - self, - relation_name: str, - expected_relation_role: RelationRole, - actual_relation_role: RelationRole, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_role - self.actual_relation_role = actual_relation_role - self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( - relation_name, repr(actual_relation_role), repr(expected_relation_role) - ) - super().__init__(self.message) - - -def _validate_relation_by_interface_and_direction( - charm: CharmBase, - relation_name: str, - expected_relation_interface: str, - expected_relation_role: RelationRole, -): - """Verifies that a relation has the necessary characteristics. - - Verifies that the `relation_name` provided: (1) exists in metadata.yaml, - (2) declares as interface the interface name passed as `relation_interface` - and (3) has the right "direction", i.e., it is a relation that `charm` - provides or requires. - - Args: - charm: a `CharmBase` object to scan for the matching relation. - relation_name: the name of the relation to be verified. - expected_relation_interface: the interface name to be matched by the - relation named `relation_name`. - expected_relation_role: whether the `relation_name` must be either - provided or required by `charm`. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the same relation interface - as specified via the `expected_relation_interface` argument. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the same role as specified - via the `expected_relation_role` argument. - """ - if relation_name not in charm.meta.relations: - raise RelationNotFoundError(relation_name) - - relation = charm.meta.relations[relation_name] - - actual_relation_interface = relation.interface_name - if actual_relation_interface != expected_relation_interface: - raise RelationInterfaceMismatchError( - relation_name, - expected_relation_interface, - actual_relation_interface, # pyright: ignore - ) - - if expected_relation_role == RelationRole.provides: - if relation_name not in charm.meta.provides: - raise RelationRoleMismatchError( - relation_name, RelationRole.provides, RelationRole.requires - ) - elif expected_relation_role == RelationRole.requires: - if relation_name not in charm.meta.requires: - raise RelationRoleMismatchError( - relation_name, RelationRole.requires, RelationRole.provides - ) - else: - raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) - - -class InvalidAlertRulePathError(Exception): - """Raised if the alert rules folder cannot be found or is otherwise invalid.""" - - def __init__( - self, - alert_rules_absolute_path: Path, - message: str, - ): - self.alert_rules_absolute_path = alert_rules_absolute_path - self.message = message - - super().__init__(self.message) - - -def _is_official_alert_rule_format(rules_dict: dict) -> bool: - """Are alert rules in the upstream format as supported by Loki. - - Alert rules in dictionary format are in "official" form if they - contain a "groups" key, since this implies they contain a list of - alert rule groups. - - Args: - rules_dict: a set of alert rules in Python dictionary format - - Returns: - True if alert rules are in official Loki file format. - """ - return "groups" in rules_dict - - -def _is_single_alert_rule_format(rules_dict: dict) -> bool: - """Are alert rules in single rule format. - - The Loki charm library supports reading of alert rules in a - custom format that consists of a single alert rule per file. This - does not conform to the official Loki alert rule file format - which requires that each alert rules file consists of a list of - alert rule groups and each group consists of a list of alert - rules. - - Alert rules in dictionary form are considered to be in single rule - format if in the least it contains two keys corresponding to the - alert rule name and alert expression. - - Returns: - True if alert rule is in single rule file format. - """ - # one alert rule per file - return set(rules_dict) >= {"alert", "expr"} - - -class AlertRules: - """Utility class for amalgamating Loki alert rule files and injecting juju topology. - - An `AlertRules` object supports aggregating alert rules from files and directories in both - official and single rule file formats using the `add_path()` method. All the alert rules - read are annotated with Juju topology labels and amalgamated into a single data structure - in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be - easily dumped into JSON format and exchanged over relation data. The dictionary can also - be dumped into YAML format and written directly into an alert rules file that is read by - Loki. Note that multiple `AlertRules` objects must not be written into the same file, - since Loki allows only a single list of alert rule groups per alert rules file. - - The official Loki format is a YAML file conforming to the Loki documentation - (https://grafana.com/docs/loki/latest/api/#list-rule-groups). - The custom single rule format is a subsection of the official YAML, having a single alert - rule, effectively "one alert per file". - """ - - # This class uses the following terminology for the various parts of a rule file: - # - alert rules file: the entire groups[] yaml, including the "groups:" key. - # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list - # of dictionaries that have the "name" and "rules" keys. - # - alert group (singular): a single dictionary that has the "name" and "rules" keys. - # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with - # the "alert" and "expr" keys. - # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. - - def __init__(self, topology: Optional[JujuTopology] = None): - """Build and alert rule object. - - Args: - topology: a `JujuTopology` instance that is used to annotate all alert rules. - """ - self.topology = topology - self.tool = CosTool(None) - self.alert_groups = [] # type: List[dict] - - def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: - """Read a rules file from path, injecting juju topology. - - Args: - root_path: full path to the root rules folder (used only for generating group name) - file_path: full path to a *.rule file. - - Returns: - A list of dictionaries representing the rules file, if file is valid (the structure is - formed by `yaml.safe_load` of the file); an empty list otherwise. - """ - with file_path.open() as rf: - # Load a list of rules from file then add labels and filters - try: - rule_file = yaml.safe_load(rf) or {} - - except Exception as e: - logger.error("Failed to read alert rules from %s: %s", file_path.name, e) - return [] - - if _is_official_alert_rule_format(rule_file): - alert_groups = rule_file["groups"] - elif _is_single_alert_rule_format(rule_file): - # convert to list of alert groups - # group name is made up from the file name - alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] - else: - # invalid/unsupported - reason = "file is empty" if not rule_file else "unexpected file structure" - logger.error("Invalid rules file (%s): %s", reason, file_path.name) - return [] - - # update rules with additional metadata - for alert_group in alert_groups: - # update group name with topology and sub-path - alert_group["name"] = self._group_name( - str(root_path), - str(file_path), - alert_group["name"], - ) - - # add "juju_" topology labels - for alert_rule in alert_group["rules"]: - if "labels" not in alert_rule: - alert_rule["labels"] = {} - - if self.topology: - alert_rule["labels"].update(self.topology.label_matcher_dict) - # insert juju topology filters into a prometheus alert rule - # logql doesn't like empty matchers, so add a job matcher which hits - # any string as a "wildcard" which the topology labels will - # filter down - alert_rule["expr"] = self.tool.inject_label_matchers( - re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]), - self.topology.label_matcher_dict, - ) - - return alert_groups - - def _group_name( - self, - root_path: typing.Union[Path, str], - file_path: typing.Union[Path, str], - group_name: str, - ) -> str: - """Generate group name from path and topology. - - The group name is made up of the relative path between the root dir_path, the file path, - and topology identifier. - - Args: - root_path: path to the root rules dir. - file_path: path to rule file. - group_name: original group name to keep as part of the new augmented group name - - Returns: - New group name, augmented by juju topology and relative path. - """ - file_path = Path(file_path) if not isinstance(file_path, Path) else file_path - root_path = Path(root_path) if not isinstance(root_path, Path) else root_path - rel_path = file_path.parent.relative_to(root_path.as_posix()) - - # We should account for both absolute paths and Windows paths. Convert it to a POSIX - # string, strip off any leading /, then join it - - path_str = "" - if not rel_path == Path("."): - # Get rid of leading / and optionally drive letters so they don't muck up - # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...' - # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's - # optional, but it makes it clear what we're doing. - - # Note that Path doesn't actually care whether the path is valid just to instantiate - # the object, so we can happily strip that stuff out to make templating nicer - rel_path = Path( - re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix()) - if rel_path.is_absolute() - else str(rel_path) - ) - - # Get rid of relative path characters in the middle which both os.path and pathlib - # leave hanging around. We could use path.resolve(), but that would lead to very - # long template strings when rules come from pods and/or other deeply nested charm - # paths - path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts)) - - # Generate group name: - # - name, from juju topology - # - suffix, from the relative path of the rule file; - group_name_parts = [self.topology.identifier] if self.topology else [] - group_name_parts.extend([path_str, group_name, "alerts"]) - # filter to remove empty strings - return "_".join(filter(lambda x: x, group_name_parts)) - - @classmethod - def _multi_suffix_glob( - cls, dir_path: Path, suffixes: List[str], recursive: bool = True - ) -> list: - """Helper function for getting all files in a directory that have a matching suffix. - - Args: - dir_path: path to the directory to glob from. - suffixes: list of suffixes to include in the glob (items should begin with a period). - recursive: a flag indicating whether a glob is recursive (nested) or not. - - Returns: - List of files in `dir_path` that have one of the suffixes specified in `suffixes`. - """ - all_files_in_dir = dir_path.glob("**/*" if recursive else "*") - return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) - - def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: - """Read all rule files in a directory. - - All rules from files for the same directory are loaded into a single - group. The generated name of this group includes juju topology. - By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. - - Args: - dir_path: directory containing *.rule files (alert rules without groups). - recursive: flag indicating whether to scan for rule files recursively. - - Returns: - a list of dictionaries representing prometheus alert rule groups, each dictionary - representing an alert group (structure determined by `yaml.safe_load`). - """ - alert_groups = [] # type: List[dict] - - # Gather all alerts into a list of groups - for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): - alert_groups_from_file = self._from_file(dir_path, file_path) - if alert_groups_from_file: - logger.debug("Reading alert rule from %s", file_path) - alert_groups.extend(alert_groups_from_file) - - return alert_groups - - def add_path(self, path_str: str, *, recursive: bool = False): - """Add rules from a dir path. - - All rules from files are aggregated into a data structure representing a single rule file. - All group names are augmented with juju topology. - - Args: - path_str: either a rules file or a dir of rules files. - recursive: whether to read files recursively or not (no impact if `path` is a file). - - Raises: - InvalidAlertRulePathError: if the provided path is invalid. - """ - path = Path(path_str) # type: Path - if path.is_dir(): - self.alert_groups.extend(self._from_dir(path, recursive)) - elif path.is_file(): - self.alert_groups.extend(self._from_file(path.parent, path)) - else: - logger.debug("The alerts file does not exist: %s", path) - - def as_dict(self) -> dict: - """Return standard alert rules file in dict representation. - - Returns: - a dictionary containing a single list of alert rule groups. - The list of alert rule groups is provided as value of the - "groups" dictionary key. - """ - return {"groups": self.alert_groups} if self.alert_groups else {} - - -def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: - """Resolve the provided path items against the directory of the main file. - - Look up the directory of the `main.py` file being executed. This is normally - going to be the charm.py file of the charm including this library. Then, resolve - the provided path elements and, if the result path exists and is a directory, - return its absolute path; otherwise, raise en exception. - - Raises: - InvalidAlertRulePathError, if the path does not exist or is not a directory. - """ - charm_dir = Path(str(charm.charm_dir)) - if not charm_dir.exists() or not charm_dir.is_dir(): - # Operator Framework does not currently expose a robust - # way to determine the top level charm source directory - # that is consistent across deployed charms and unit tests - # Hence for unit tests the current working directory is used - # TODO: updated this logic when the following ticket is resolved - # https://github.com/canonical/operator/issues/643 - charm_dir = Path(os.getcwd()) - - alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) - - if not alerts_dir_path.exists(): - raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") - if not alerts_dir_path.is_dir(): - raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") - - return str(alerts_dir_path) - - -class NoRelationWithInterfaceFoundError(Exception): - """No relations with the given interface are found in the charm meta.""" - - def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): - self.charm = charm - self.relation_interface = relation_interface - self.message = ( - "No relations with interface '{}' found in the meta of the '{}' charm".format( - relation_interface, charm.meta.name - ) - ) - - super().__init__(self.message) - - -class MultipleRelationsWithInterfaceFoundError(Exception): - """Multiple relations with the given interface are found in the charm meta.""" - - def __init__(self, charm: CharmBase, relation_interface: str, relations: list): - self.charm = charm - self.relation_interface = relation_interface - self.relations = relations - self.message = ( - "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( - relation_interface, charm.meta.name - ) - ) - super().__init__(self.message) - - -class LokiPushApiEndpointDeparted(EventBase): - """Event emitted when Loki departed.""" - - -class LokiPushApiEndpointJoined(EventBase): - """Event emitted when Loki joined.""" - - -class LokiPushApiAlertRulesChanged(EventBase): - """Event emitted if there is a change in the alert rules.""" - - def __init__(self, handle, relation, relation_id, app=None, unit=None): - """Pretend we are almost like a RelationEvent. - - Fields to serialize: - { - "relation_name": , - "relation_id": , - "app_name": , - "unit_name": - } - - In this way, we can transparently use `RelationEvent.snapshot()` to pass - it back if we need to log it. - """ - super().__init__(handle) - self.relation = relation - self.relation_id = relation_id - self.app = app - self.unit = unit - - def snapshot(self) -> Dict: - """Save event information.""" - if not self.relation: - return {} - snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} - if self.app: - snapshot["app_name"] = self.app.name - if self.unit: - snapshot["unit_name"] = self.unit.name - return snapshot - - def restore(self, snapshot: dict): - """Restore event information.""" - self.relation = self.framework.model.get_relation( - snapshot["relation_name"], snapshot["relation_id"] - ) - app_name = snapshot.get("app_name") - if app_name: - self.app = self.framework.model.get_app(app_name) - else: - self.app = None - unit_name = snapshot.get("unit_name") - if unit_name: - self.unit = self.framework.model.get_unit(unit_name) - else: - self.unit = None - - -class InvalidAlertRuleEvent(EventBase): - """Event emitted when alert rule files are not parsable. - - Enables us to set a clear status on the provider. - """ - - def __init__(self, handle, errors: str = "", valid: bool = False): - super().__init__(handle) - self.errors = errors - self.valid = valid - - def snapshot(self) -> Dict: - """Save alert rule information.""" - return { - "valid": self.valid, - "errors": self.errors, - } - - def restore(self, snapshot): - """Restore alert rule information.""" - self.valid = snapshot["valid"] - self.errors = snapshot["errors"] - - -class LokiPushApiEvents(ObjectEvents): - """Event descriptor for events raised by `LokiPushApiProvider`.""" - - loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) - loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) - loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) - alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) - - -class LokiPushApiProvider(Object): - """A LokiPushApiProvider class.""" - - on = LokiPushApiEvents() # pyright: ignore - - def __init__( - self, - charm, - relation_name: str = DEFAULT_RELATION_NAME, - *, - port: Union[str, int] = 3100, - scheme: str = "http", - address: str = "localhost", - path: str = "loki/api/v1/push", - ): - """A Loki service provider. - - Args: - charm: a `CharmBase` instance that manages this - instance of the Loki service. - relation_name: an optional string name of the relation between `charm` - and the Loki charmed service. The default is "logging". - It is strongly advised not to change the default, so that people - deploying your charm will have a consistent experience with all - other charms that consume metrics endpoints. - port: an optional port of the Loki service (default is "3100"). - scheme: an optional scheme of the Loki API URL (default is "http"). - address: an optional address of the Loki service (default is "localhost"). - path: an optional path of the Loki API URL (default is "loki/api/v1/push") - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the `loki_push_api` relation - interface. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the `RelationRole.requires` - role. - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides - ) - super().__init__(charm, relation_name) - self._charm = charm - self._relation_name = relation_name - self._tool = CosTool(self) - self.port = int(port) - self.scheme = scheme - self.address = address - self.path = path - - events = self._charm.on[relation_name] - self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) - self.framework.observe(events.relation_joined, self._on_logging_relation_joined) - self.framework.observe(events.relation_changed, self._on_logging_relation_changed) - self.framework.observe(events.relation_departed, self._on_logging_relation_departed) - self.framework.observe(events.relation_broken, self._on_logging_relation_broken) - - def _on_lifecycle_event(self, _): - # Upgrade event or other charm-level event - should_update = False - for relation in self._charm.model.relations[self._relation_name]: - # Don't accidentally flip a True result back. - should_update = should_update or self._process_logging_relation_changed(relation) - if should_update: - # We don't have a RelationEvent, so build it up by hand - first_rel = self._charm.model.relations[self._relation_name][0] - self.on.loki_push_api_alert_rules_changed.emit( - relation=first_rel, - relation_id=first_rel.id, - ) - - def _on_logging_relation_joined(self, event: RelationJoinedEvent): - """Set basic data on relation joins. - - Set the promtail binary URL location, which will not change, and anything - else which may be required, but is static.. - - Args: - event: a `CharmEvent` in response to which the consumer - charm must set its relation data. - """ - if self._charm.unit.is_leader(): - event.relation.data[self._charm.app].update(self._promtail_binary_url) - logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) - - def _on_logging_relation_changed(self, event: HookEvent): - """Handle changes in related consumers. - - Anytime there are changes in the relation between Loki - and its consumers charms. - - Args: - event: a `CharmEvent` in response to which the consumer - charm must update its relation data. - """ - should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore - if should_update: - self.on.loki_push_api_alert_rules_changed.emit( - relation=event.relation, # pyright: ignore - relation_id=event.relation.id, # pyright: ignore - app=self._charm.app, - unit=self._charm.unit, - ) - - def _on_logging_relation_broken(self, event: RelationBrokenEvent): - """Removes alert rules files when consumer charms left the relation with Loki. - - Args: - event: a `CharmEvent` in response to which the Loki - charm must update its relation data. - """ - self.on.loki_push_api_alert_rules_changed.emit( - relation=event.relation, - relation_id=event.relation.id, - app=self._charm.app, - unit=self._charm.unit, - ) - - def _on_logging_relation_departed(self, event: RelationDepartedEvent): - """Removes alert rules files when consumer charms left the relation with Loki. - - Args: - event: a `CharmEvent` in response to which the Loki - charm must update its relation data. - """ - self.on.loki_push_api_alert_rules_changed.emit( - relation=event.relation, - relation_id=event.relation.id, - app=self._charm.app, - unit=self._charm.unit, - ) - - def _should_update_alert_rules(self, relation) -> bool: - """Determine whether alert rules should be regenerated. - - If there are alert rules in the relation data bag, tell the charm - whether to regenerate them based on the boolean returned here. - """ - if relation.data.get(relation.app).get("alert_rules", None) is not None: - return True - return False - - def _process_logging_relation_changed(self, relation: Relation) -> bool: - """Handle changes in related consumers. - - Anytime there are changes in relations between Loki - and its consumers charms, Loki set the `loki_push_api` - into the relation data. Set the endpoint building - appropriately, and if there are alert rules present in - the relation, let the caller know. - Besides Loki generates alert rules files based what - consumer charms forwards, - - Args: - relation: the `Relation` instance to update. - - Returns: - A boolean indicating whether an event should be emitted, so we - only emit one on lifecycle events - """ - relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" - self.update_endpoint(relation=relation) - return self._should_update_alert_rules(relation) - - @property - def _promtail_binary_url(self) -> dict: - """URL from which Promtail binary can be downloaded.""" - # construct promtail binary url paths from parts - promtail_binaries = {} - for arch, info in PROMTAIL_BINARIES.items(): - info["url"] = "{}/promtail-{}/{}.gz".format( - PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] - ) - promtail_binaries[arch] = info - - return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} - - def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: - """Triggers programmatically the update of endpoint in unit relation data. - - This method should be used when the charm relying on this library needs - to update the relation data in response to something occurring outside - the `logging` relation lifecycle, e.g., in case of a - host address change because the charmed operator becomes connected to an - Ingress after the `logging` relation is established. - - Args: - url: An optional url value to update relation data. - relation: An optional instance of `class:ops.model.Relation` to update. - """ - # if no relation is specified update all of them - if not relation: - if not self._charm.model.relations.get(self._relation_name): - return - - relations_list = self._charm.model.relations.get(self._relation_name) - else: - relations_list = [relation] - - endpoint = self._endpoint(url or self._url) - - for relation in relations_list: - relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) - - logger.debug("Saved endpoint in unit relation data") - - @property - def _url(self) -> str: - """Get local Loki Push API url. - - Return url to loki, including port number, but without the endpoint subpath. - """ - return "http://{}:{}".format(socket.getfqdn(), self.port) - - def _endpoint(self, url) -> dict: - """Get Loki push API endpoint for a given url. - - Args: - url: A loki unit URL. - - Returns: str - """ - endpoint = "/loki/api/v1/push" - return {"url": url.rstrip("/") + endpoint} - - @property - def alerts(self) -> dict: # noqa: C901 - """Fetch alerts for all relations. - - A Loki alert rules file consists of a list of "groups". Each - group consists of a list of alerts (`rules`) that are sequentially - executed. This method returns all the alert rules provided by each - related metrics provider charm. These rules may be used to generate a - separate alert rules file for each relation since the returned list - of alert groups are indexed by relation ID. Also for each relation ID - associated scrape metadata such as Juju model, UUID and application - name are provided so a unique name may be generated for the rules - file. For each relation the structure of data returned is a dictionary - with four keys - - - groups - - model - - model_uuid - - application - - The value of the `groups` key is such that it may be used to generate - a Loki alert rules file directly using `yaml.dump` but the - `groups` key itself must be included as this is required by Loki, - for example as in `yaml.dump({"groups": alerts["groups"]})`. - - Currently only accepts a list of rules and these - rules are all placed into a single group, even though Loki itself - allows for multiple groups within a single alert rules file. - - Returns: - a dictionary of alert rule groups and associated scrape - metadata indexed by relation ID. - """ - alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files - for relation in self._charm.model.relations[self._relation_name]: - if not relation.units or not relation.app: - continue - - alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) - if not alert_rules: - continue - - alert_rules = self._inject_alert_expr_labels(alert_rules) - - identifier, topology = self._get_identifier_by_alert_rules(alert_rules) - if not topology: - try: - metadata = json.loads(relation.data[relation.app]["metadata"]) - identifier = JujuTopology.from_dict(metadata).identifier - alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore - - except KeyError as e: - logger.debug( - "Relation %s has no 'metadata': %s", - relation.id, - e, - ) - - if not identifier: - logger.error( - "Alert rules were found but no usable group or identifier was present." - ) - continue - - _, errmsg = self._tool.validate_alert_rules(alert_rules) - if errmsg: - relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) - continue - - alerts[identifier] = alert_rules - - return alerts - - def _get_identifier_by_alert_rules( - self, rules: dict - ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: - """Determine an appropriate dict key for alert rules. - - The key is used as the filename when writing alerts to disk, so the structure - and uniqueness is important. - - Args: - rules: a dict of alert rules - Returns: - A tuple containing an identifier, if found, and a JujuTopology, if it could - be constructed. - """ - if "groups" not in rules: - logger.debug("No alert groups were found in relation data") - return None, None - - # Construct an ID based on what's in the alert rules if they have labels - for group in rules["groups"]: - try: - labels = group["rules"][0]["labels"] - topology = JujuTopology( - # Don't try to safely get required constructor fields. There's already - # a handler for KeyErrors - model_uuid=labels["juju_model_uuid"], - model=labels["juju_model"], - application=labels["juju_application"], - unit=labels.get("juju_unit", ""), - charm_name=labels.get("juju_charm", ""), - ) - return topology.identifier, topology - except KeyError: - logger.debug("Alert rules were found but no usable labels were present") - continue - - logger.warning( - "No labeled alert rules were found, and no 'scrape_metadata' " - "was available. Using the alert group name as filename." - ) - try: - for group in rules["groups"]: - return group["name"], None - except KeyError: - logger.debug("No group name was found to use as identifier") - - return None, None - - def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: - """Iterate through alert rules and inject topology into expressions. - - Args: - rules: a dict of alert rules - """ - if "groups" not in rules: - return rules - - modified_groups = [] - for group in rules["groups"]: - # Copy off rules, so we don't modify an object we're iterating over - rules_copy = group["rules"] - for idx, rule in enumerate(rules_copy): - labels = rule.get("labels") - - if labels: - try: - topology = JujuTopology( - # Don't try to safely get required constructor fields. There's already - # a handler for KeyErrors - model_uuid=labels["juju_model_uuid"], - model=labels["juju_model"], - application=labels["juju_application"], - unit=labels.get("juju_unit", ""), - charm_name=labels.get("juju_charm", ""), - ) - - # Inject topology and put it back in the list - rule["expr"] = self._tool.inject_label_matchers( - re.sub(r"%%juju_topology%%,?", "", rule["expr"]), - topology.label_matcher_dict, - ) - except KeyError: - # Some required JujuTopology key is missing. Just move on. - pass - - group["rules"][idx] = rule - - modified_groups.append(group) - - rules["groups"] = modified_groups - return rules - - -class ConsumerBase(Object): - """Consumer's base class.""" - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, - recursive: bool = False, - skip_alert_topology_labeling: bool = False, - ): - super().__init__(charm, relation_name) - self._charm = charm - self._relation_name = relation_name - self.topology = JujuTopology.from_charm(charm) - - try: - alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) - except InvalidAlertRulePathError as e: - logger.debug( - "Invalid Loki alert rules folder at %s: %s", - e.alert_rules_absolute_path, - e.message, - ) - self._alert_rules_path = alert_rules_path - self._skip_alert_topology_labeling = skip_alert_topology_labeling - - self._recursive = recursive - - def _handle_alert_rules(self, relation): - if not self._charm.unit.is_leader(): - return - - alert_rules = ( - AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology) - ) - alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) - alert_rules_as_dict = alert_rules.as_dict() - - relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) - relation.data[self._charm.app]["alert_rules"] = json.dumps( - alert_rules_as_dict, - sort_keys=True, # sort, to prevent unnecessary relation_changed events - ) - - @property - def loki_endpoints(self) -> List[dict]: - """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. - - Returns: - A list of dictionaries with Loki Push API endpoints, for instance: - [ - {"url": "http://loki1:3100/loki/api/v1/push"}, - {"url": "http://loki2:3100/loki/api/v1/push"}, - ] - """ - endpoints = [] # type: list - - for relation in self._charm.model.relations[self._relation_name]: - for unit in relation.units: - if unit.app == self._charm.app: - # This is a peer unit - continue - - endpoint = relation.data[unit].get("endpoint") - if endpoint: - deserialized_endpoint = json.loads(endpoint) - endpoints.append(deserialized_endpoint) - - return endpoints - - -class LokiPushApiConsumer(ConsumerBase): - """Loki Consumer class.""" - - on = LokiPushApiEvents() # pyright: ignore - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, - recursive: bool = True, - skip_alert_topology_labeling: bool = False, - ): - """Construct a Loki charm client. - - The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as - the Loki API endpoint to push logs. It is intended for workloads that can speak - loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such - as grafana-agent. - (If you only need to forward a few workload log files, then use LogProxyConsumer.) - - `LokiPushApiConsumer` can be instantiated as follows: - - self._loki_consumer = LokiPushApiConsumer(self) - - Args: - charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. - Typically, this is `self` in the instantiating class. - relation_name: the string name of the relation interface to look up. - If `charm` has exactly one relation with this interface, the relation's - name is returned. If none or multiple relations with the provided interface - are found, this method will raise either a NoRelationWithInterfaceFoundError or - MultipleRelationsWithInterfaceFoundError exception, respectively. - alert_rules_path: a string indicating a path where alert rules can be found - recursive: Whether to scan for rule files recursively. - skip_alert_topology_labeling: whether to skip the alert topology labeling. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the `loki_push_api` relation - interface. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the `RelationRole.provides` - role. - - Emits: - loki_push_api_endpoint_joined: This event is emitted when the relation between the - Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) - and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. - loki_push_api_endpoint_departed: This event is emitted when the relation between the - Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) - and the Charmed Operator that implements `LokiPushApiConsumer` is removed. - loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules - file is encountered or if `alert_rules_path` is empty. - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires - ) - super().__init__( - charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling - ) - events = self._charm.on[relation_name] - self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) - self.framework.observe(events.relation_joined, self._on_logging_relation_joined) - self.framework.observe(events.relation_changed, self._on_logging_relation_changed) - self.framework.observe(events.relation_departed, self._on_logging_relation_departed) - - def _on_lifecycle_event(self, _: HookEvent): - """Update require relation data on charm upgrades and other lifecycle events. - - Args: - event: a `CharmEvent` in response to which the consumer - charm must update its relation data. - """ - # Upgrade event or other charm-level event - self._reinitialize_alert_rules() - self.on.loki_push_api_endpoint_joined.emit() - - def _on_logging_relation_joined(self, event: RelationJoinedEvent): - """Handle changes in related consumers. - - Update relation data and emit events when a relation is established. - - Args: - event: a `CharmEvent` in response to which the consumer - charm must update its relation data. - - Emits: - loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. - loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules - file is encountered or if `alert_rules_path` is empty. - """ - # Alert rules will not change over the lifecycle of a charm, and do not need to be - # constantly set on every relation_changed event. Leave them here. - self._handle_alert_rules(event.relation) - self.on.loki_push_api_endpoint_joined.emit() - - def _on_logging_relation_changed(self, event: RelationEvent): - """Handle changes in related consumers. - - Anytime there are changes in the relation between Loki - and its consumers charms. - - Args: - event: a `CharmEvent` in response to which the consumer - charm must update its relation data. - - Emits: - loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. - loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules - file is encountered or if `alert_rules_path` is empty. - """ - if self._charm.unit.is_leader(): - ev = json.loads(event.relation.data[event.app].get("event", "{}")) - - if ev: - valid = bool(ev.get("valid", True)) - errors = ev.get("errors", "") - - if valid and not errors: - self.on.alert_rule_status_changed.emit(valid=valid) - else: - self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) - - self.on.loki_push_api_endpoint_joined.emit() - - def _reinitialize_alert_rules(self): - """Reloads alert rules and updates all relations.""" - for relation in self._charm.model.relations[self._relation_name]: - self._handle_alert_rules(relation) - - def _process_logging_relation_changed(self, relation: Relation): - self._handle_alert_rules(relation) - self.on.loki_push_api_endpoint_joined.emit() - - def _on_logging_relation_departed(self, _: RelationEvent): - """Handle departures in related providers. - - Anytime there are departures in relations between the consumer charm and Loki - the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. - The consumer charm can then choose to update its configuration. - """ - # Provide default to avoid throwing, as in some complicated scenarios with - # upgrades and hook failures we might not have data in the storage - self.on.loki_push_api_endpoint_departed.emit() - - -class ContainerNotFoundError(Exception): - """Raised if the specified container does not exist.""" - - def __init__(self): - msg = "The specified container does not exist." - self.message = msg - - super().__init__(self.message) - - -class MultipleContainersFoundError(Exception): - """Raised if no container name is passed but multiple containers are present.""" - - def __init__(self): - msg = ( - "No 'container_name' parameter has been specified; since this Charmed Operator" - " is has multiple containers, container_name must be specified for the container" - " to get logs from." - ) - self.message = msg - - super().__init__(self.message) - - -class PromtailDigestError(EventBase): - """Event emitted when there is an error with Promtail initialization.""" - - def __init__(self, handle, message): - super().__init__(handle) - self.message = message - - def snapshot(self): - """Save message information.""" - return {"message": self.message} - - def restore(self, snapshot): - """Restore message information.""" - self.message = snapshot["message"] - - -class LogProxyEndpointDeparted(EventBase): - """Event emitted when a Log Proxy has departed.""" - - -class LogProxyEndpointJoined(EventBase): - """Event emitted when a Log Proxy joins.""" - - -class LogProxyEvents(ObjectEvents): - """Event descriptor for events raised by `LogProxyConsumer`.""" - - promtail_digest_error = EventSource(PromtailDigestError) - log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) - log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) - - -class LogProxyConsumer(ConsumerBase): - """LogProxyConsumer class. - - The `LogProxyConsumer` object provides a method for attaching `promtail` to - a workload in order to generate structured logging data from applications - which traditionally log to syslog or do not have native Loki integration. - The `LogProxyConsumer` can be instantiated as follows: - - self._log_proxy_consumer = LogProxyConsumer(self, log_files=["/var/log/messages"]) - - Args: - charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. - Typically, this is `self` in the instantiating class. - log_files: a list of log files to monitor with Promtail. - relation_name: the string name of the relation interface to look up. - If `charm` has exactly one relation with this interface, the relation's - name is returned. If none or multiple relations with the provided interface - are found, this method will raise either a NoRelationWithInterfaceFoundError or - MultipleRelationsWithInterfaceFoundError exception, respectively. - enable_syslog: Whether to enable syslog integration. - syslog_port: The port syslog is attached to. - alert_rules_path: an optional path for the location of alert rules - files. Defaults to "./src/loki_alert_rules", - resolved from the directory hosting the charm entry file. - The alert rules are automatically updated on charm upgrade. - recursive: Whether to scan for rule files recursively. - container_name: An optional container name to inject the payload into. - promtail_resource_name: An optional promtail resource name from metadata - if it has been modified and attached - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the `loki_push_api` relation - interface. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the `RelationRole.provides` - role. - """ - - on = LogProxyEvents() # pyright: ignore - - def __init__( - self, - charm, - log_files: Optional[Union[List[str], str]] = None, - relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, - enable_syslog: bool = False, - syslog_port: int = 1514, - alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, - recursive: bool = False, - container_name: str = "", - promtail_resource_name: Optional[str] = None, - *, # TODO: In v1, move the star up so everything after 'charm' is a kwarg - insecure_skip_verify: bool = False, - ): - super().__init__(charm, relation_name, alert_rules_path, recursive) - self._charm = charm - self._relation_name = relation_name - self._container = self._get_container(container_name) - self._container_name = self._get_container_name(container_name) - - if not log_files: - log_files = [] - elif isinstance(log_files, str): - log_files = [log_files] - elif not isinstance(log_files, list) or not all((isinstance(x, str) for x in log_files)): - raise TypeError("The 'log_files' argument must be a list of strings.") - self._log_files = log_files - - self._syslog_port = syslog_port - self._is_syslog = enable_syslog - self.topology = JujuTopology.from_charm(charm) - self._promtail_resource_name = promtail_resource_name or "promtail-bin" - self.insecure_skip_verify = insecure_skip_verify - - # architecture used for promtail binary - arch = platform.processor() - self._arch = "amd64" if arch == "x86_64" else arch - - events = self._charm.on[relation_name] - self.framework.observe(events.relation_created, self._on_relation_created) - self.framework.observe(events.relation_changed, self._on_relation_changed) - self.framework.observe(events.relation_departed, self._on_relation_departed) - # turn the container name to a valid Python identifier - snake_case_container_name = self._container_name.replace("-", "_") - self.framework.observe( - getattr(self._charm.on, "{}_pebble_ready".format(snake_case_container_name)), - self._on_pebble_ready, - ) - - def _on_pebble_ready(self, _: WorkloadEvent): - """Event handler for `pebble_ready`.""" - if self.model.relations[self._relation_name]: - self._setup_promtail() - - def _on_relation_created(self, _: RelationCreatedEvent) -> None: - """Event handler for `relation_created`.""" - if not self._container.can_connect(): - return - self._setup_promtail() - - def _on_relation_changed(self, event: RelationEvent) -> None: - """Event handler for `relation_changed`. - - Args: - event: The event object `RelationChangedEvent`. - """ - self._handle_alert_rules(event.relation) - - if self._charm.unit.is_leader(): - ev = json.loads(event.relation.data[event.app].get("event", "{}")) - - if ev: - valid = bool(ev.get("valid", True)) - errors = ev.get("errors", "") - - if valid and not errors: - self.on.alert_rule_status_changed.emit(valid=valid) - else: - self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) - - if not self._container.can_connect(): - return - if self.model.relations[self._relation_name]: - if "promtail" not in self._container.get_plan().services: - self._setup_promtail() - return - - new_config = self._promtail_config - if new_config != self._current_config: - self._container.push( - WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True - ) - - # Loki may send endpoints late. Don't necessarily start, there may be - # no clients - if new_config["clients"]: - self._container.restart(WORKLOAD_SERVICE_NAME) - self.on.log_proxy_endpoint_joined.emit() - else: - self.on.promtail_digest_error.emit("No promtail client endpoints available!") - - def _on_relation_departed(self, _: RelationEvent) -> None: - """Event handler for `relation_departed`. - - Args: - event: The event object `RelationDepartedEvent`. - """ - if not self._container.can_connect(): - return - if not self._charm.model.relations[self._relation_name]: - self._container.stop(WORKLOAD_SERVICE_NAME) - return - - new_config = self._promtail_config - if new_config != self._current_config: - self._container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) - - if new_config["clients"]: - self._container.restart(WORKLOAD_SERVICE_NAME) - else: - self._container.stop(WORKLOAD_SERVICE_NAME) - self.on.log_proxy_endpoint_departed.emit() - - def _get_container(self, container_name: str = "") -> Container: # pyright: ignore - """Gets a single container by name or using the only container running in the Pod. - - If there is more than one container in the Pod a `PromtailDigestError` is emitted. - - Args: - container_name: The container name. - - Returns: - A `ops.model.Container` object representing the container. - - Emits: - PromtailDigestError, if there was a problem obtaining a container. - """ - try: - container_name = self._get_container_name(container_name) - return self._charm.unit.get_container(container_name) - except (MultipleContainersFoundError, ContainerNotFoundError, ModelError) as e: - msg = str(e) - logger.warning(msg) - self.on.promtail_digest_error.emit(msg) - - def _get_container_name(self, container_name: str = "") -> str: - """Helper function for getting/validating a container name. - - Args: - container_name: The container name to be validated (optional). - - Returns: - container_name: The same container_name that was passed (if it exists) or the only - container name that is present (if no container_name was passed). - - Raises: - ContainerNotFoundError, if container_name does not exist. - MultipleContainersFoundError, if container_name was not provided but multiple - containers are present. - """ - containers = dict(self._charm.model.unit.containers) - if len(containers) == 0: - raise ContainerNotFoundError - - if not container_name: - # container_name was not provided - will get it ourselves, if it is the only one - if len(containers) > 1: - raise MultipleContainersFoundError - - # Get the first key in the containers' dict. - # Need to "cast", otherwise: - # error: Incompatible return value type (got "Optional[str]", expected "str") - container_name = cast(str, next(iter(containers.keys()))) - - elif container_name not in containers: - raise ContainerNotFoundError - - return container_name - - def _add_pebble_layer(self, workload_binary_path: str) -> None: - """Adds Pebble layer that manages Promtail service in Workload container. - - Args: - workload_binary_path: string providing path to promtail binary in workload container. - """ - pebble_layer = { - "summary": "promtail layer", - "description": "pebble config layer for promtail", - "services": { - WORKLOAD_SERVICE_NAME: { - "override": "replace", - "summary": WORKLOAD_SERVICE_NAME, - "command": "{} {}".format(workload_binary_path, self._cli_args), - "startup": "disabled", - } - }, - } - self._container.add_layer( - self._container_name, pebble_layer, combine=True # pyright: ignore - ) - - def _create_directories(self) -> None: - """Creates the directories for Promtail binary and config file.""" - self._container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) - self._container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) - - def _obtain_promtail(self, promtail_info: dict) -> None: - """Obtain promtail binary from an attached resource or download it. - - Args: - promtail_info: dictionary containing information about promtail binary - that must be used. The dictionary must have three keys - - "filename": filename of promtail binary - - "zipsha": sha256 sum of zip file of promtail binary - - "binsha": sha256 sum of unpacked promtail binary - """ - workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) - if self._promtail_attached_as_resource: - self._push_promtail_if_attached(workload_binary_path) - return - - if self._promtail_must_be_downloaded(promtail_info): - self._download_and_push_promtail_to_workload(promtail_info) - else: - binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) - self._push_binary_to_workload(binary_path, workload_binary_path) - - def _push_binary_to_workload(self, binary_path: str, workload_binary_path: str) -> None: - """Push promtail binary into workload container. - - Args: - binary_path: path in charm container from which promtail binary is read. - workload_binary_path: path in workload container to which promtail binary is pushed. - """ - with open(binary_path, "rb") as f: - self._container.push( - workload_binary_path, - f, - permissions=0o755, - encoding=None, # pyright: ignore - make_dirs=True, - ) - logger.debug("The promtail binary file has been pushed to the workload container.") - - @property - def _promtail_attached_as_resource(self) -> bool: - """Checks whether Promtail binary is attached to the charm or not. - - Returns: - a boolean representing whether Promtail binary is attached as a resource or not. - """ - try: - self._charm.model.resources.fetch(self._promtail_resource_name) - return True - except ModelError: - return False - except NameError as e: - if "invalid resource name" in str(e): - return False - raise - - def _push_promtail_if_attached(self, workload_binary_path: str) -> bool: - """Checks whether Promtail binary is attached to the charm or not. - - Args: - workload_binary_path: string specifying expected path of promtail - in workload container - - Returns: - a boolean representing whether Promtail binary is attached or not. - """ - logger.info("Promtail binary file has been obtained from an attached resource.") - resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) - self._push_binary_to_workload(resource_path, workload_binary_path) - return True - - def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: - """Checks whether promtail binary must be downloaded or not. - - Args: - promtail_info: dictionary containing information about promtail binary - that must be used. The dictionary must have three keys - - "filename": filename of promtail binary - - "zipsha": sha256 sum of zip file of promtail binary - - "binsha": sha256 sum of unpacked promtail binary - - Returns: - a boolean representing whether Promtail binary must be downloaded or not. - """ - binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) - if not self._is_promtail_binary_in_charm(binary_path): - return True - - if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): - return True - - logger.debug("Promtail binary file is already in the the charm container.") - return False - - def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: - """Checks whether a file's sha256sum matches or not with a specific sha256sum. - - Args: - file_path: A string representing the files' patch. - sha256sum: The sha256sum against which we want to verify. - - Returns: - a boolean representing whether a file's sha256sum matches or not with - a specific sha256sum. - """ - try: - with open(file_path, "rb") as f: - file_bytes = f.read() - result = sha256(file_bytes).hexdigest() - - if result != sha256sum: - msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( - sha256sum, result - ) - logger.debug(msg) - return False - - return True - except (APIError, FileNotFoundError): - msg = "File: '{}' could not be opened".format(file_path) - logger.error(msg) - return False - - def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: - """Check if Promtail binary is already stored in charm container. - - Args: - binary_path: string path of promtail binary to check - - Returns: - a boolean representing whether Promtail is present or not. - """ - return True if Path(binary_path).is_file() else False - - def _download_and_push_promtail_to_workload(self, promtail_info: dict) -> None: - """Downloads a Promtail zip file and pushes the binary to the workload. - - Args: - promtail_info: dictionary containing information about promtail binary - that must be used. The dictionary must have three keys - - "filename": filename of promtail binary - - "zipsha": sha256 sum of zip file of promtail binary - - "binsha": sha256 sum of unpacked promtail binary - """ - # Check for Juju proxy variables and fall back to standard ones if not set - # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get - # the proxy env variables from the environment - proxies = { - # The ProxyHandler uses only the protocol names as keys - # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler - "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), - "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), - # The ProxyHandler uses `no` for the no_proxy key - # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553 - "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""), - } - proxies = {k: v for k, v in proxies.items() if v != ""} or None - - proxy_handler = request.ProxyHandler(proxies) - opener = request.build_opener(proxy_handler) - - with opener.open(promtail_info["url"]) as r: - file_bytes = r.read() - file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") - with open(file_path, "wb") as f: - f.write(file_bytes) - logger.info( - "Promtail binary zip file has been downloaded and stored in: %s", - file_path, - ) - - decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) - binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) - with open(binary_path, "wb") as outfile: - outfile.write(decompressed_file.read()) - logger.debug("Promtail binary file has been downloaded.") - - workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) - self._push_binary_to_workload(binary_path, workload_binary_path) - - @property - def _cli_args(self) -> str: - """Return the cli arguments to pass to promtail. - - Returns: - The arguments as a string - """ - return "-config.file={}".format(WORKLOAD_CONFIG_PATH) - - @property - def _current_config(self) -> dict: - """Property that returns the current Promtail configuration. - - Returns: - A dict containing Promtail configuration. - """ - if not self._container.can_connect(): - logger.debug("Could not connect to promtail container!") - return {} - try: - raw_current = self._container.pull(WORKLOAD_CONFIG_PATH).read() - return yaml.safe_load(raw_current) - except (ProtocolError, PathError) as e: - logger.warning( - "Could not check the current promtail configuration due to " - "a failure in retrieving the file: %s", - e, - ) - return {} - - @property - def _promtail_config(self) -> dict: - """Generates the config file for Promtail. - - Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration - """ - config = {"clients": self._clients_list()} - if self.insecure_skip_verify: - for client in config["clients"]: - client["tls_config"] = {"insecure_skip_verify": True} - - config.update(self._server_config()) - config.update(self._positions()) - config.update(self._scrape_configs()) - return config - - def _clients_list(self) -> list: - """Generates a list of clients for use in the promtail config. - - Returns: - A list of endpoints - """ - return self.loki_endpoints - - def _server_config(self) -> dict: - """Generates the server section of the Promtail config file. - - Returns: - A dict representing the `server` section. - """ - return { - "server": { - "http_listen_port": HTTP_LISTEN_PORT, - "grpc_listen_port": GRPC_LISTEN_PORT, - } - } - - def _positions(self) -> dict: - """Generates the positions section of the Promtail config file. - - Returns: - A dict representing the `positions` section. - """ - return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} - - def _scrape_configs(self) -> dict: - """Generates the scrape_configs section of the Promtail config file. - - Returns: - A dict representing the `scrape_configs` section. - """ - job_name = "juju_{}".format(self.topology.identifier) - - # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it - common_labels = { - "juju_{}".format(k): v - for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() - } - scrape_configs = [] - - # Files config - labels = common_labels.copy() - labels.update( - { - "job": job_name, - "__path__": "", - } - ) - config = {"targets": ["localhost"], "labels": labels} - scrape_config = { - "job_name": "system", - "static_configs": self._generate_static_configs(config), - } - scrape_configs.append(scrape_config) - - # Syslog config - if self._is_syslog: - relabel_mappings = [ - "severity", - "facility", - "hostname", - "app_name", - "proc_id", - "msg_id", - ] - syslog_labels = common_labels.copy() - syslog_labels.update({"job": "{}_syslog".format(job_name)}) - syslog_config = { - "job_name": "syslog", - "syslog": { - "listen_address": "127.0.0.1:{}".format(self._syslog_port), - "label_structured_data": True, - "labels": syslog_labels, - }, - "relabel_configs": [ - {"source_labels": ["__syslog_message_{}".format(val)], "target_label": val} - for val in relabel_mappings - ] - + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], - } - scrape_configs.append(syslog_config) # type: ignore - - return {"scrape_configs": scrape_configs} - - def _generate_static_configs(self, config: dict) -> list: - """Generates static_configs section. - - Returns: - - a list of dictionaries representing static_configs section - """ - static_configs = [] - - for _file in self._log_files: - conf = deepcopy(config) - conf["labels"]["__path__"] = _file - static_configs.append(conf) - - return static_configs - - def _setup_promtail(self) -> None: - # Use the first - relations = self._charm.model.relations[self._relation_name] - if len(relations) > 1: - logger.debug( - "Multiple log_proxy relations. Getting Promtail from application {}".format( - relations[0].app.name - ) - ) - relation = relations[0] - promtail_binaries = json.loads( - relation.data[relation.app].get("promtail_binary_zip_url", "{}") - ) - if not promtail_binaries: - return - - if not self._is_promtail_installed(promtail_binaries[self._arch]): - try: - self._obtain_promtail(promtail_binaries[self._arch]) - except HTTPError as e: - msg = "Promtail binary couldn't be downloaded - {}".format(str(e)) - logger.warning(msg) - self.on.promtail_digest_error.emit(msg) - return - - workload_binary_path = os.path.join( - WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] - ) - - self._create_directories() - self._container.push( - WORKLOAD_CONFIG_PATH, yaml.safe_dump(self._promtail_config), make_dirs=True - ) - - self._add_pebble_layer(workload_binary_path) - - if self._current_config.get("clients"): - try: - self._container.restart(WORKLOAD_SERVICE_NAME) - except ChangeError as e: - self.on.promtail_digest_error.emit(str(e)) - else: - self.on.log_proxy_endpoint_joined.emit() - else: - self.on.promtail_digest_error.emit("No promtail client endpoints available!") - - def _is_promtail_installed(self, promtail_info: dict) -> bool: - """Determine if promtail has already been installed to the container. - - Args: - promtail_info: dictionary containing information about promtail binary - that must be used. The dictionary must at least contain a key - "filename" giving the name of promtail binary - """ - workload_binary_path = "{}/{}".format(WORKLOAD_BINARY_DIR, promtail_info["filename"]) - try: - self._container.list_files(workload_binary_path) - except (APIError, FileNotFoundError): - return False - return True - - @property - def syslog_port(self) -> str: - """Gets the port on which promtail is listening for syslog. - - Returns: - A str representing the port - """ - return str(self._syslog_port) - - @property - def rsyslog_config(self) -> str: - """Generates a config line for use with rsyslog. - - Returns: - The rsyslog config line as a string - """ - return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( - self._syslog_port - ) - - -class CosTool: - """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" - - _path = None - _disabled = False - - def __init__(self, charm): - self._charm = charm - - @property - def path(self): - """Lazy lookup of the path of cos-tool.""" - if self._disabled: - return None - if not self._path: - self._path = self._get_tool_path() - if not self._path: - logger.debug("Skipping injection of juju topology as label matchers") - self._disabled = True - return self._path - - def apply_label_matchers(self, rules) -> dict: - """Will apply label matchers to the expression of all alerts in all supplied groups.""" - if not self.path: - return rules - for group in rules["groups"]: - rules_in_group = group.get("rules", []) - for rule in rules_in_group: - topology = {} - # if the user for some reason has provided juju_unit, we'll need to honor it - # in most cases, however, this will be empty - for label in [ - "juju_model", - "juju_model_uuid", - "juju_application", - "juju_charm", - "juju_unit", - ]: - if label in rule["labels"]: - topology[label] = rule["labels"][label] - - rule["expr"] = self.inject_label_matchers(rule["expr"], topology) - return rules - - def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: - """Will validate correctness of alert rules, returning a boolean and any errors.""" - if not self.path: - logger.debug("`cos-tool` unavailable. Not validating alert correctness.") - return True, "" - - with tempfile.TemporaryDirectory() as tmpdir: - rule_path = Path(tmpdir + "/validate_rule.yaml") - - # Smash "our" rules format into what upstream actually uses, which is more like: - # - # groups: - # - name: foo - # rules: - # - alert: SomeAlert - # expr: up - # - alert: OtherAlert - # expr: up - transformed_rules = {"groups": []} # type: ignore - for rule in rules["groups"]: - transformed_rules["groups"].append(rule) - - rule_path.write_text(yaml.dump(transformed_rules)) - args = [str(self.path), "--format", "logql", "validate", str(rule_path)] - # noinspection PyBroadException - try: - self._exec(args) - return True, "" - except subprocess.CalledProcessError as e: - logger.debug("Validating the rules failed: %s", e.output) - return False, ", ".join([line for line in e.output if "error validating" in line]) - - def inject_label_matchers(self, expression, topology) -> str: - """Add label matchers to an expression.""" - if not topology: - return expression - if not self.path: - logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) - return expression - args = [str(self.path), "--format", "logql", "transform"] - args.extend( - ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] - ) - - args.extend(["{}".format(expression)]) - # noinspection PyBroadException - try: - return self._exec(args) - except subprocess.CalledProcessError as e: - logger.debug('Applying the expression failed: "%s", falling back to the original', e) - print('Applying the expression failed: "{}", falling back to the original'.format(e)) - return expression - - def _get_tool_path(self) -> Optional[Path]: - arch = platform.processor() - arch = "amd64" if arch == "x86_64" else arch - res = "cos-tool-{}".format(arch) - try: - path = Path(res).resolve() - path.chmod(0o777) - return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: - logger.debug('Could not locate cos-tool at: "{}"'.format(res)) - return None - - def _exec(self, cmd) -> str: - result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) - output = result.stdout.decode("utf-8").strip() - return output diff --git a/charm/lib/charms/observability_libs/v0/juju_topology.py b/charm/lib/charms/observability_libs/v0/juju_topology.py deleted file mode 100644 index a79e5d4..0000000 --- a/charm/lib/charms/observability_libs/v0/juju_topology.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -"""## Overview. - -This document explains how to use the `JujuTopology` class to -create and consume topology information from Juju in a consistent manner. - -The goal of the Juju topology is to uniquely identify a piece -of software running across any of your Juju-managed deployments. -This is achieved by combining the following four elements: - -- Model name -- Model UUID -- Application name -- Unit identifier - - -For a more in-depth description of the concept, as well as a -walk-through of it's use-case in observability, see -[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) -on the Juju blog. - -## Library Usage - -This library may be used to create and consume `JujuTopology` objects. -The `JujuTopology` class provides three ways to create instances: - -### Using the `from_charm` method - -Enables instantiation by supplying the charm as an argument. When -creating topology objects for the current charm, this is the recommended -approach. - -```python -topology = JujuTopology.from_charm(self) -``` - -### Using the `from_dict` method - -Allows for instantion using a dictionary of relation data, like the -`scrape_metadata` from Prometheus or the labels of an alert rule. When -creating topology objects for remote charms, this is the recommended -approach. - -```python -scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) -topology = JujuTopology.from_dict(scrape_metadata) -``` - -### Using the class constructor - -Enables instantiation using whatever values you want. While this -is useful in some very specific cases, this is almost certainly not -what you are looking for as setting these values manually may -result in observability metrics which do not uniquely identify a -charm in order to provide accurate usage reporting, alerting, -horizontal scaling, or other use cases. - -```python -topology = JujuTopology( - model="some-juju-model", - model_uuid="00000000-0000-0000-0000-000000000001", - application="fancy-juju-application", - unit="fancy-juju-application/0", - charm_name="fancy-juju-application-k8s", -) -``` - -""" -from collections import OrderedDict -from typing import Dict, List, Optional -from uuid import UUID - -# The unique Charmhub library identifier, never change it -LIBID = "bced1658f20f49d28b88f61f83c2d232" - -LIBAPI = 0 -LIBPATCH = 6 - - -class InvalidUUIDError(Exception): - """Invalid UUID was provided.""" - - def __init__(self, uuid: str): - self.message = "'{}' is not a valid UUID.".format(uuid) - super().__init__(self.message) - - -class JujuTopology: - """JujuTopology is used for storing, generating and formatting juju topology information. - - DEPRECATED: This class is deprecated. Use `pip install cosl` and - `from cosl.juju_topology import JujuTopology` instead. - """ - - def __init__( - self, - model: str, - model_uuid: str, - application: str, - unit: Optional[str] = None, - charm_name: Optional[str] = None, - ): - """Build a JujuTopology object. - - A `JujuTopology` object is used for storing and transforming - Juju topology information. This information is used to - annotate Prometheus scrape jobs and alert rules. Such - annotation when applied to scrape jobs helps in identifying - the source of the scrapped metrics. On the other hand when - applied to alert rules topology information ensures that - evaluation of alert expressions is restricted to the source - (charm) from which the alert rules were obtained. - - Args: - model: a string name of the Juju model - model_uuid: a globally unique string identifier for the Juju model - application: an application name as a string - unit: a unit name as a string - charm_name: name of charm as a string - """ - if not self.is_valid_uuid(model_uuid): - raise InvalidUUIDError(model_uuid) - - self._model = model - self._model_uuid = model_uuid - self._application = application - self._charm_name = charm_name - self._unit = unit - - def is_valid_uuid(self, uuid): - """Validate the supplied UUID against the Juju Model UUID pattern. - - Args: - uuid: string that needs to be checked if it is valid v4 UUID. - - Returns: - True if parameter is a valid v4 UUID, False otherwise. - """ - try: - return str(UUID(uuid, version=4)) == uuid - except (ValueError, TypeError): - return False - - @classmethod - def from_charm(cls, charm): - """Creates a JujuTopology instance by using the model data available on a charm object. - - Args: - charm: a `CharmBase` object for which the `JujuTopology` will be constructed - Returns: - a `JujuTopology` object. - """ - return cls( - model=charm.model.name, - model_uuid=charm.model.uuid, - application=charm.model.app.name, - unit=charm.model.unit.name, - charm_name=charm.meta.name, - ) - - @classmethod - def from_dict(cls, data: dict): - """Factory method for creating `JujuTopology` children from a dictionary. - - Args: - data: a dictionary with five keys providing topology information. The keys are - - "model" - - "model_uuid" - - "application" - - "unit" - - "charm_name" - `unit` and `charm_name` may be empty, but will result in more limited - labels. However, this allows us to support charms without workloads. - - Returns: - a `JujuTopology` object. - """ - return cls( - model=data["model"], - model_uuid=data["model_uuid"], - application=data["application"], - unit=data.get("unit", ""), - charm_name=data.get("charm_name", ""), - ) - - def as_dict( - self, - *, - remapped_keys: Optional[Dict[str, str]] = None, - excluded_keys: Optional[List[str]] = None, - ) -> OrderedDict: - """Format the topology information into an ordered dict. - - Keeping the dictionary ordered is important to be able to - compare dicts without having to resort to deep comparisons. - - Args: - remapped_keys: A dictionary mapping old key names to new key names, - which will be substituted when invoked. - excluded_keys: A list of key names to exclude from the returned dict. - uuid_length: The length to crop the UUID to. - """ - ret = OrderedDict( - [ - ("model", self.model), - ("model_uuid", self.model_uuid), - ("application", self.application), - ("unit", self.unit), - ("charm_name", self.charm_name), - ] - ) - if excluded_keys: - ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) - - if remapped_keys: - ret = OrderedDict( - (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore - ) - - return ret - - @property - def identifier(self) -> str: - """Format the topology information into a terse string. - - This crops the model UUID, making it unsuitable for comparisons against - anything but other identifiers. Mainly to be used as a display name or file - name where long strings might become an issue. - - >>> JujuTopology( \ - model = "a-model", \ - model_uuid = "00000000-0000-4000-8000-000000000000", \ - application = "some-app", \ - unit = "some-app/1" \ - ).identifier - 'a-model_00000000_some-app' - """ - parts = self.as_dict( - excluded_keys=["unit", "charm_name"], - ) - - parts["model_uuid"] = self.model_uuid_short - values = parts.values() - - return "_".join([str(val) for val in values]).replace("/", "_") - - @property - def label_matcher_dict(self) -> Dict[str, str]: - """Format the topology information into a dict with keys having 'juju_' as prefix. - - Relabelled topology never includes the unit as it would then only match - the leader unit (ie. the unit that produced the dict). - """ - items = self.as_dict( - remapped_keys={"charm_name": "charm"}, - excluded_keys=["unit"], - ).items() - - return {"juju_{}".format(key): value for key, value in items if value} - - @property - def label_matchers(self) -> str: - """Format the topology information into a promql/logql label matcher string. - - Topology label matchers should never include the unit as it - would then only match the leader unit (ie. the unit that - produced the matchers). - """ - items = self.label_matcher_dict.items() - return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) - - @property - def model(self) -> str: - """Getter for the juju model value.""" - return self._model - - @property - def model_uuid(self) -> str: - """Getter for the juju model uuid value.""" - return self._model_uuid - - @property - def model_uuid_short(self) -> str: - """Getter for the juju model value, truncated to the first eight letters.""" - return self._model_uuid[:8] - - @property - def application(self) -> str: - """Getter for the juju application value.""" - return self._application - - @property - def charm_name(self) -> Optional[str]: - """Getter for the juju charm name value.""" - return self._charm_name - - @property - def unit(self) -> Optional[str]: - """Getter for the juju unit value.""" - return self._unit diff --git a/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py deleted file mode 100644 index 665af88..0000000 --- a/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ /dev/null @@ -1,2379 +0,0 @@ -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. -"""Prometheus Scrape Library. - -## Overview - -This document explains how to integrate with the Prometheus charm -for the purpose of providing a metrics endpoint to Prometheus. It -also explains how alternative implementations of the Prometheus charms -may maintain the same interface and be backward compatible with all -currently integrated charms. Finally this document is the -authoritative reference on the structure of relation data that is -shared between Prometheus charms and any other charm that intends to -provide a scrape target for Prometheus. - -## Source code - -Source code can be found on GitHub at: - https://github.com/canonical/prometheus-k8s-operator/tree/main/lib/charms/prometheus_k8s - -## Provider Library Usage - -This Prometheus charm interacts with its scrape targets using its -charm library. Charms seeking to expose metric endpoints for the -Prometheus charm, must do so using the `MetricsEndpointProvider` -object from this charm library. For the simplest use cases, using the -`MetricsEndpointProvider` object only requires instantiating it, -typically in the constructor of your charm (the one which exposes a -metrics endpoint). The `MetricsEndpointProvider` constructor requires -the name of the relation over which a scrape target (metrics endpoint) -is exposed to the Prometheus charm. This relation must use the -`prometheus_scrape` interface. By default address of the metrics -endpoint is set to the unit IP address, by each unit of the -`MetricsEndpointProvider` charm. These units set their address in -response to the `PebbleReady` event of each container in the unit, -since container restarts of Kubernetes charms can result in change of -IP addresses. The default name for the metrics endpoint relation is -`metrics-endpoint`. It is strongly recommended to use the same -relation name for consistency across charms and doing so obviates the -need for an additional constructor argument. The -`MetricsEndpointProvider` object may be instantiated as follows - - from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider - - def __init__(self, *args): - super().__init__(*args) - ... - self.metrics_endpoint = MetricsEndpointProvider(self) - ... - -Note that the first argument (`self`) to `MetricsEndpointProvider` is -always a reference to the parent (scrape target) charm. - -An instantiated `MetricsEndpointProvider` object will ensure that each -unit of its parent charm, is a scrape target for the -`MetricsEndpointConsumer` (Prometheus) charm. By default -`MetricsEndpointProvider` assumes each unit of the consumer charm -exports its metrics at a path given by `/metrics` on port 80. These -defaults may be changed by providing the `MetricsEndpointProvider` -constructor an optional argument (`jobs`) that represents a -Prometheus scrape job specification using Python standard data -structures. This job specification is a subset of Prometheus' own -[scrape -configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) -format but represented using Python data structures. More than one job -may be provided using the `jobs` argument. Hence `jobs` accepts a list -of dictionaries where each dictionary represents one `` -object as described in the Prometheus documentation. The currently -supported configuration subset is: `job_name`, `metrics_path`, -`static_configs` - -Suppose it is required to change the port on which scraped metrics are -exposed to 8000. This may be done by providing the following data -structure as the value of `jobs`. - -``` -[ - { - "static_configs": [ - { - "targets": ["*:8000"] - } - ] - } -] -``` - -The wildcard ("*") host specification implies that the scrape targets -will automatically be set to the host addresses advertised by each -unit of the consumer charm. - -It is also possible to change the metrics path and scrape multiple -ports, for example - -``` -[ - { - "metrics_path": "/my-metrics-path", - "static_configs": [ - { - "targets": ["*:8000", "*:8081"], - } - ] - } -] -``` - -More complex scrape configurations are possible. For example - -``` -[ - { - "static_configs": [ - { - "targets": ["10.1.32.215:7000", "*:8000"], - "labels": { - "some_key": "some-value" - } - } - ] - } -] -``` - -This example scrapes the target "10.1.32.215" at port 7000 in addition -to scraping each unit at port 8000. There is however one difference -between wildcard targets (specified using "*") and fully qualified -targets (such as "10.1.32.215"). The Prometheus charm automatically -associates labels with metrics generated by each target. These labels -localise the source of metrics within the Juju topology by specifying -its "model name", "model UUID", "application name" and "unit -name". However unit name is associated only with wildcard targets but -not with fully qualified targets. - -Multiple jobs with different metrics paths and labels are allowed, but -each job must be given a unique name: - -``` -[ - { - "job_name": "my-first-job", - "metrics_path": "one-path", - "static_configs": [ - { - "targets": ["*:7000"], - "labels": { - "some_key": "some-value" - } - } - ] - }, - { - "job_name": "my-second-job", - "metrics_path": "another-path", - "static_configs": [ - { - "targets": ["*:8000"], - "labels": { - "some_other_key": "some-other-value" - } - } - ] - } -] -``` - -**Important:** `job_name` should be a fixed string (e.g. hardcoded literal). -For instance, if you include variable elements, like your `unit.name`, it may break -the continuity of the metrics time series gathered by Prometheus when the leader unit -changes (e.g. on upgrade or rescale). - -Additionally, it is also technically possible, but **strongly discouraged**, to -configure the following scrape-related settings, which behave as described by the -[Prometheus documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config): - -- `static_configs` -- `scrape_interval` -- `scrape_timeout` -- `proxy_url` -- `relabel_configs` -- `metrics_relabel_configs` -- `sample_limit` -- `label_limit` -- `label_name_length_limit` -- `label_value_length_limit` - -The settings above are supported by the `prometheus_scrape` library only for the sake of -specialized facilities like the [Prometheus Scrape Config](https://charmhub.io/prometheus-scrape-config-k8s) -charm. Virtually no charms should use these settings, and charmers definitely **should not** -expose them to the Juju administrator via configuration options. - -## Consumer Library Usage - -The `MetricsEndpointConsumer` object may be used by Prometheus -charms to manage relations with their scrape targets. For this -purposes a Prometheus charm needs to do two things - -1. Instantiate the `MetricsEndpointConsumer` object by providing it a -reference to the parent (Prometheus) charm and optionally the name of -the relation that the Prometheus charm uses to interact with scrape -targets. This relation must confirm to the `prometheus_scrape` -interface and it is strongly recommended that this relation be named -`metrics-endpoint` which is its default value. - -For example a Prometheus charm may instantiate the -`MetricsEndpointConsumer` in its constructor as follows - - from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointConsumer - - def __init__(self, *args): - super().__init__(*args) - ... - self.metrics_consumer = MetricsEndpointConsumer(self) - ... - -2. A Prometheus charm also needs to respond to the -`TargetsChangedEvent` event of the `MetricsEndpointConsumer` by adding itself as -an observer for these events, as in - - self.framework.observe( - self.metrics_consumer.on.targets_changed, - self._on_scrape_targets_changed, - ) - -In responding to the `TargetsChangedEvent` event the Prometheus -charm must update the Prometheus configuration so that any new scrape -targets are added and/or old ones removed from the list of scraped -endpoints. For this purpose the `MetricsEndpointConsumer` object -exposes a `jobs()` method that returns a list of scrape jobs. Each -element of this list is the Prometheus scrape configuration for that -job. In order to update the Prometheus configuration, the Prometheus -charm needs to replace the current list of jobs with the list provided -by `jobs()` as follows - - def _on_scrape_targets_changed(self, event): - ... - scrape_jobs = self.metrics_consumer.jobs() - for job in scrape_jobs: - prometheus_scrape_config.append(job) - ... - -## Alerting Rules - -This charm library also supports gathering alerting rules from all -related `MetricsEndpointProvider` charms and enabling corresponding alerts within the -Prometheus charm. Alert rules are automatically gathered by `MetricsEndpointProvider` -charms when using this library, from a directory conventionally named -`prometheus_alert_rules`. This directory must reside at the top level -in the `src` folder of the consumer charm. Each file in this directory -is assumed to be in one of two formats: -- the official prometheus alert rule format, conforming to the -[Prometheus docs](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) -- a single rule format, which is a simplified subset of the official format, -comprising a single alert rule per file, using the same YAML fields. - -The file name must have one of the following extensions: -- `.rule` -- `.rules` -- `.yml` -- `.yaml` - -An example of the contents of such a file in the custom single rule -format is shown below. - -``` -alert: HighRequestLatency -expr: job:request_latency_seconds:mean5m{my_key=my_value} > 0.5 -for: 10m -labels: - severity: Medium - type: HighLatency -annotations: - summary: High request latency for {{ $labels.instance }}. -``` - -The `MetricsEndpointProvider` will read all available alert rules and -also inject "filtering labels" into the alert expressions. The -filtering labels ensure that alert rules are localised to the metrics -provider charm's Juju topology (application, model and its UUID). Such -a topology filter is essential to ensure that alert rules submitted by -one provider charm generates alerts only for that same charm. When -alert rules are embedded in a charm, and the charm is deployed as a -Juju application, the alert rules from that application have their -expressions automatically updated to filter for metrics coming from -the units of that application alone. This remove risk of spurious -evaluation, e.g., when you have multiple deployments of the same charm -monitored by the same Prometheus. - -Not all alerts one may want to specify can be embedded in a -charm. Some alert rules will be specific to a user's use case. This is -the case, for example, of alert rules that are based on business -constraints, like expecting a certain amount of requests to a specific -API every five minutes. Such alert rules can be specified via the -[COS Config Charm](https://charmhub.io/cos-configuration-k8s), -which allows importing alert rules and other settings like dashboards -from a Git repository. - -Gathering alert rules and generating rule files within the Prometheus -charm is easily done using the `alerts()` method of -`MetricsEndpointConsumer`. Alerts generated by Prometheus will -automatically include Juju topology labels in the alerts. These labels -indicate the source of the alert. The following labels are -automatically included with each alert - -- `juju_model` -- `juju_model_uuid` -- `juju_application` - -## Relation Data - -The Prometheus charm uses both application and unit relation data to -obtain information regarding its scrape jobs, alert rules and scrape -targets. This relation data is in JSON format and it closely resembles -the YAML structure of Prometheus [scrape configuration] -(https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). - -Units of Metrics provider charms advertise their names and addresses -over unit relation data using the `prometheus_scrape_unit_name` and -`prometheus_scrape_unit_address` keys. While the `scrape_metadata`, -`scrape_jobs` and `alert_rules` keys in application relation data -of Metrics provider charms hold eponymous information. - -""" # noqa: W505 - -import copy -import hashlib -import ipaddress -import json -import logging -import os -import platform -import re -import socket -import subprocess -import tempfile -from collections import defaultdict -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from urllib.parse import urlparse - -import yaml -from cosl import JujuTopology -from cosl.rules import AlertRules -from ops.charm import CharmBase, RelationRole -from ops.framework import ( - BoundEvent, - EventBase, - EventSource, - Object, - ObjectEvents, - StoredDict, - StoredList, - StoredState, -) -from ops.model import Relation - -# The unique Charmhub library identifier, never change it -LIBID = "bc84295fef5f4049878f07b131968ee2" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 44 - -PYDEPS = ["cosl"] - -logger = logging.getLogger(__name__) - - -ALLOWED_KEYS = { - "job_name", - "metrics_path", - "static_configs", - "scrape_interval", - "scrape_timeout", - "proxy_url", - "relabel_configs", - "metrics_relabel_configs", - "sample_limit", - "label_limit", - "label_name_length_limit", - "label_value_length_limit", - "scheme", - "basic_auth", - "tls_config", - "authorization", - "params", -} -DEFAULT_JOB = { - "metrics_path": "/metrics", - "static_configs": [{"targets": ["*:80"]}], -} - - -DEFAULT_RELATION_NAME = "metrics-endpoint" -RELATION_INTERFACE_NAME = "prometheus_scrape" - -DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/prometheus_alert_rules" - - -class PrometheusConfig: - """A namespace for utility functions for manipulating the prometheus config dict.""" - - # relabel instance labels so that instance identifiers are globally unique - # stable over unit recreation - topology_relabel_config = { - "source_labels": ["juju_model", "juju_model_uuid", "juju_application"], - "separator": "_", - "target_label": "instance", - "regex": "(.*)", - } - - topology_relabel_config_wildcard = { - "source_labels": ["juju_model", "juju_model_uuid", "juju_application", "juju_unit"], - "separator": "_", - "target_label": "instance", - "regex": "(.*)", - } - - @staticmethod - def sanitize_scrape_config(job: dict) -> dict: - """Restrict permissible scrape configuration options. - - If job is empty then a default job is returned. The - default job is - - ``` - { - "metrics_path": "/metrics", - "static_configs": [{"targets": ["*:80"]}], - } - ``` - - Args: - job: a dict containing a single Prometheus job - specification. - - Returns: - a dictionary containing a sanitized job specification. - """ - sanitized_job = DEFAULT_JOB.copy() - sanitized_job.update({key: value for key, value in job.items() if key in ALLOWED_KEYS}) - return sanitized_job - - @staticmethod - def sanitize_scrape_configs(scrape_configs: List[dict]) -> List[dict]: - """A vectorized version of `sanitize_scrape_config`.""" - return [PrometheusConfig.sanitize_scrape_config(job) for job in scrape_configs] - - @staticmethod - def prefix_job_names(scrape_configs: List[dict], prefix: str) -> List[dict]: - """Adds the given prefix to all the job names in the given scrape_configs list.""" - modified_scrape_configs = [] - for scrape_config in scrape_configs: - job_name = scrape_config.get("job_name") - modified = scrape_config.copy() - modified["job_name"] = prefix + "_" + job_name if job_name else prefix - modified_scrape_configs.append(modified) - - return modified_scrape_configs - - @staticmethod - def expand_wildcard_targets_into_individual_jobs( - scrape_jobs: List[dict], - hosts: Dict[str, Tuple[str, str]], - topology: Optional[JujuTopology] = None, - ) -> List[dict]: - """Extract wildcard hosts from the given scrape_configs list into separate jobs. - - Args: - scrape_jobs: list of scrape jobs. - hosts: a dictionary mapping host names to host address for - all units of the relation for which this job configuration - must be constructed. - topology: optional arg for adding topology labels to scrape targets. - """ - # hosts = self._relation_hosts(relation) - - modified_scrape_jobs = [] - for job in scrape_jobs: - static_configs = job.get("static_configs") - if not static_configs: - continue - - # When a single unit specified more than one wildcard target, then they are expanded - # into a static_config per target - non_wildcard_static_configs = [] - - for static_config in static_configs: - targets = static_config.get("targets") - if not targets: - continue - - # All non-wildcard targets remain in the same static_config - non_wildcard_targets = [] - - # All wildcard targets are extracted to a job per unit. If multiple wildcard - # targets are specified, they remain in the same static_config (per unit). - wildcard_targets = [] - - for target in targets: - match = re.compile(r"\*(?:(:\d+))?").match(target) - if match: - # This is a wildcard target. - # Need to expand into separate jobs and remove it from this job here - wildcard_targets.append(target) - else: - # This is not a wildcard target. Copy it over into its own static_config. - non_wildcard_targets.append(target) - - # All non-wildcard targets remain in the same static_config - if non_wildcard_targets: - non_wildcard_static_config = static_config.copy() - non_wildcard_static_config["targets"] = non_wildcard_targets - - if topology: - # When non-wildcard targets (aka fully qualified hostnames) are specified, - # there is no reliable way to determine the name (Juju topology unit name) - # for such a target. Therefore labeling with Juju topology, excluding the - # unit name. - non_wildcard_static_config["labels"] = { - **non_wildcard_static_config.get("labels", {}), - **topology.label_matcher_dict, - } - - non_wildcard_static_configs.append(non_wildcard_static_config) - - # Extract wildcard targets into individual jobs - if wildcard_targets: - for unit_name, (unit_hostname, unit_path) in hosts.items(): - modified_job = job.copy() - modified_job["static_configs"] = [static_config.copy()] - modified_static_config = modified_job["static_configs"][0] - modified_static_config["targets"] = [ - target.replace("*", unit_hostname) for target in wildcard_targets - ] - - unit_num = unit_name.split("/")[-1] - job_name = modified_job.get("job_name", "unnamed-job") + "-" + unit_num - modified_job["job_name"] = job_name - modified_job["metrics_path"] = unit_path + ( - job.get("metrics_path") or "/metrics" - ) - - if topology: - # Add topology labels - modified_static_config["labels"] = { - **modified_static_config.get("labels", {}), - **topology.label_matcher_dict, - **{"juju_unit": unit_name}, - } - - # Instance relabeling for topology should be last in order. - modified_job["relabel_configs"] = modified_job.get( - "relabel_configs", [] - ) + [PrometheusConfig.topology_relabel_config_wildcard] - - modified_scrape_jobs.append(modified_job) - - if non_wildcard_static_configs: - modified_job = job.copy() - modified_job["static_configs"] = non_wildcard_static_configs - modified_job["metrics_path"] = modified_job.get("metrics_path") or "/metrics" - - if topology: - # Instance relabeling for topology should be last in order. - modified_job["relabel_configs"] = modified_job.get("relabel_configs", []) + [ - PrometheusConfig.topology_relabel_config - ] - - modified_scrape_jobs.append(modified_job) - - return modified_scrape_jobs - - @staticmethod - def render_alertmanager_static_configs(alertmanagers: List[str]): - """Render the alertmanager static_configs section from a list of URLs. - - Each target must be in the hostname:port format, and prefixes are specified in a separate - key. Therefore, with ingress in place, would need to extract the path into the - `path_prefix` key, which is higher up in the config hierarchy. - - https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config - - Args: - alertmanagers: List of alertmanager URLs. - - Returns: - A dict representation for the static_configs section. - """ - # Make sure it's a valid url so urlparse could parse it. - scheme = re.compile(r"^https?://") - sanitized = [am if scheme.search(am) else "http://" + am for am in alertmanagers] - - # Create a mapping from paths to netlocs - # Group alertmanager targets into a dictionary of lists: - # {path: [netloc1, netloc2]} - paths = defaultdict(list) # type: Dict[Tuple[str, str], List[str]] - for parsed in map(urlparse, sanitized): - path = parsed.path or "/" - paths[(parsed.scheme, path)].append(parsed.netloc) - - return { - "alertmanagers": [ - { - # For https we still do not render a `tls_config` section because - # certs are expected to be made available by the charm via the - # `update-ca-certificates` mechanism. - "scheme": scheme, - "path_prefix": path_prefix, - "static_configs": [{"targets": netlocs}], - } - for (scheme, path_prefix), netlocs in paths.items() - ] - } - - -class RelationNotFoundError(Exception): - """Raised if there is no relation with the given name is found.""" - - def __init__(self, relation_name: str): - self.relation_name = relation_name - self.message = "No relation named '{}' found".format(relation_name) - - super().__init__(self.message) - - -class RelationInterfaceMismatchError(Exception): - """Raised if the relation with the given name has a different interface.""" - - def __init__( - self, - relation_name: str, - expected_relation_interface: str, - actual_relation_interface: str, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_interface - self.actual_relation_interface = actual_relation_interface - self.message = ( - "The '{}' relation has '{}' as interface rather than the expected '{}'".format( - relation_name, actual_relation_interface, expected_relation_interface - ) - ) - - super().__init__(self.message) - - -class RelationRoleMismatchError(Exception): - """Raised if the relation with the given name has a different role.""" - - def __init__( - self, - relation_name: str, - expected_relation_role: RelationRole, - actual_relation_role: RelationRole, - ): - self.relation_name = relation_name - self.expected_relation_interface = expected_relation_role - self.actual_relation_role = actual_relation_role - self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( - relation_name, repr(actual_relation_role), repr(expected_relation_role) - ) - - super().__init__(self.message) - - -class InvalidAlertRuleEvent(EventBase): - """Event emitted when alert rule files are not parsable. - - Enables us to set a clear status on the provider. - """ - - def __init__(self, handle, errors: str = "", valid: bool = False): - super().__init__(handle) - self.errors = errors - self.valid = valid - - def snapshot(self) -> Dict: - """Save alert rule information.""" - return { - "valid": self.valid, - "errors": self.errors, - } - - def restore(self, snapshot): - """Restore alert rule information.""" - self.valid = snapshot["valid"] - self.errors = snapshot["errors"] - - -class InvalidScrapeJobEvent(EventBase): - """Event emitted when alert rule files are not valid.""" - - def __init__(self, handle, errors: str = ""): - super().__init__(handle) - self.errors = errors - - def snapshot(self) -> Dict: - """Save error information.""" - return {"errors": self.errors} - - def restore(self, snapshot): - """Restore error information.""" - self.errors = snapshot["errors"] - - -class MetricsEndpointProviderEvents(ObjectEvents): - """Events raised by :class:`InvalidAlertRuleEvent`s.""" - - alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) - invalid_scrape_job = EventSource(InvalidScrapeJobEvent) - - -def _type_convert_stored(obj): - """Convert Stored* to their appropriate types, recursively.""" - if isinstance(obj, StoredList): - return list(map(_type_convert_stored, obj)) - if isinstance(obj, StoredDict): - rdict = {} # type: Dict[Any, Any] - for k in obj.keys(): - rdict[k] = _type_convert_stored(obj[k]) - return rdict - return obj - - -def _validate_relation_by_interface_and_direction( - charm: CharmBase, - relation_name: str, - expected_relation_interface: str, - expected_relation_role: RelationRole, -): - """Verifies that a relation has the necessary characteristics. - - Verifies that the `relation_name` provided: (1) exists in metadata.yaml, - (2) declares as interface the interface name passed as `relation_interface` - and (3) has the right "direction", i.e., it is a relation that `charm` - provides or requires. - - Args: - charm: a `CharmBase` object to scan for the matching relation. - relation_name: the name of the relation to be verified. - expected_relation_interface: the interface name to be matched by the - relation named `relation_name`. - expected_relation_role: whether the `relation_name` must be either - provided or required by `charm`. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the same relation interface - as specified via the `expected_relation_interface` argument. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the same role as specified - via the `expected_relation_role` argument. - """ - if relation_name not in charm.meta.relations: - raise RelationNotFoundError(relation_name) - - relation = charm.meta.relations[relation_name] - - actual_relation_interface = relation.interface_name - if actual_relation_interface != expected_relation_interface: - raise RelationInterfaceMismatchError( - relation_name, expected_relation_interface, actual_relation_interface or "None" - ) - - if expected_relation_role == RelationRole.provides: - if relation_name not in charm.meta.provides: - raise RelationRoleMismatchError( - relation_name, RelationRole.provides, RelationRole.requires - ) - elif expected_relation_role == RelationRole.requires: - if relation_name not in charm.meta.requires: - raise RelationRoleMismatchError( - relation_name, RelationRole.requires, RelationRole.provides - ) - else: - raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) - - -class InvalidAlertRulePathError(Exception): - """Raised if the alert rules folder cannot be found or is otherwise invalid.""" - - def __init__( - self, - alert_rules_absolute_path: Path, - message: str, - ): - self.alert_rules_absolute_path = alert_rules_absolute_path - self.message = message - - super().__init__(self.message) - - -def _is_official_alert_rule_format(rules_dict: dict) -> bool: - """Are alert rules in the upstream format as supported by Prometheus. - - Alert rules in dictionary format are in "official" form if they - contain a "groups" key, since this implies they contain a list of - alert rule groups. - - Args: - rules_dict: a set of alert rules in Python dictionary format - - Returns: - True if alert rules are in official Prometheus file format. - """ - return "groups" in rules_dict - - -def _is_single_alert_rule_format(rules_dict: dict) -> bool: - """Are alert rules in single rule format. - - The Prometheus charm library supports reading of alert rules in a - custom format that consists of a single alert rule per file. This - does not conform to the official Prometheus alert rule file format - which requires that each alert rules file consists of a list of - alert rule groups and each group consists of a list of alert - rules. - - Alert rules in dictionary form are considered to be in single rule - format if in the least it contains two keys corresponding to the - alert rule name and alert expression. - - Returns: - True if alert rule is in single rule file format. - """ - # one alert rule per file - return set(rules_dict) >= {"alert", "expr"} - - -class TargetsChangedEvent(EventBase): - """Event emitted when Prometheus scrape targets change.""" - - def __init__(self, handle, relation_id): - super().__init__(handle) - self.relation_id = relation_id - - def snapshot(self): - """Save scrape target relation information.""" - return {"relation_id": self.relation_id} - - def restore(self, snapshot): - """Restore scrape target relation information.""" - self.relation_id = snapshot["relation_id"] - - -class MonitoringEvents(ObjectEvents): - """Event descriptor for events raised by `MetricsEndpointConsumer`.""" - - targets_changed = EventSource(TargetsChangedEvent) - - -class MetricsEndpointConsumer(Object): - """A Prometheus based Monitoring service.""" - - on = MonitoringEvents() # pyright: ignore - - def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): - """A Prometheus based Monitoring service. - - Args: - charm: a `CharmBase` instance that manages this - instance of the Prometheus service. - relation_name: an optional string name of the relation between `charm` - and the Prometheus charmed service. The default is "metrics-endpoint". - It is strongly advised not to change the default, so that people - deploying your charm will have a consistent experience with all - other charms that consume metrics endpoints. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the `prometheus_scrape` relation - interface. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the `RelationRole.requires` - role. - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires - ) - - super().__init__(charm, relation_name) - self._charm = charm - self._relation_name = relation_name - self._tool = CosTool(self._charm) - events = self._charm.on[relation_name] - self.framework.observe(events.relation_changed, self._on_metrics_provider_relation_changed) - self.framework.observe( - events.relation_departed, self._on_metrics_provider_relation_departed - ) - - def _on_metrics_provider_relation_changed(self, event): - """Handle changes with related metrics providers. - - Anytime there are changes in relations between Prometheus - and metrics provider charms the Prometheus charm is informed, - through a `TargetsChangedEvent` event. The Prometheus charm can - then choose to update its scrape configuration. - - Args: - event: a `CharmEvent` in response to which the Prometheus - charm must update its scrape configuration. - """ - rel_id = event.relation.id - - self.on.targets_changed.emit(relation_id=rel_id) - - def _on_metrics_provider_relation_departed(self, event): - """Update job config when a metrics provider departs. - - When a metrics provider departs the Prometheus charm is informed - through a `TargetsChangedEvent` event so that it can update its - scrape configuration to ensure that the departed metrics provider - is removed from the list of scrape jobs and - - Args: - event: a `CharmEvent` that indicates a metrics provider - unit has departed. - """ - rel_id = event.relation.id - self.on.targets_changed.emit(relation_id=rel_id) - - def jobs(self) -> list: - """Fetch the list of scrape jobs. - - Returns: - A list consisting of all the static scrape configurations - for each related `MetricsEndpointProvider` that has specified - its scrape targets. - """ - scrape_jobs = [] - - for relation in self._charm.model.relations[self._relation_name]: - static_scrape_jobs = self._static_scrape_config(relation) - if static_scrape_jobs: - # Duplicate job names will cause validate_scrape_jobs to fail. - # Therefore we need to dedupe here and after all jobs are collected. - static_scrape_jobs = _dedupe_job_names(static_scrape_jobs) - try: - self._tool.validate_scrape_jobs(static_scrape_jobs) - except subprocess.CalledProcessError as e: - if self._charm.unit.is_leader(): - data = json.loads(relation.data[self._charm.app].get("event", "{}")) - data["scrape_job_errors"] = str(e) - relation.data[self._charm.app]["event"] = json.dumps(data) - else: - scrape_jobs.extend(static_scrape_jobs) - - scrape_jobs = _dedupe_job_names(scrape_jobs) - - return scrape_jobs - - @property - def alerts(self) -> dict: - """Fetch alerts for all relations. - - A Prometheus alert rules file consists of a list of "groups". Each - group consists of a list of alerts (`rules`) that are sequentially - executed. This method returns all the alert rules provided by each - related metrics provider charm. These rules may be used to generate a - separate alert rules file for each relation since the returned list - of alert groups are indexed by that relations Juju topology identifier. - The Juju topology identifier string includes substrings that identify - alert rule related metadata such as the Juju model, model UUID and the - application name from where the alert rule originates. Since this - topology identifier is globally unique, it may be used for instance as - the name for the file into which the list of alert rule groups are - written. For each relation, the structure of data returned is a dictionary - representation of a standard prometheus rules file: - - {"groups": [{"name": ...}, ...]} - - per official prometheus documentation - https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ - - The value of the `groups` key is such that it may be used to generate - a Prometheus alert rules file directly using `yaml.dump` but the - `groups` key itself must be included as this is required by Prometheus. - - For example the list of alert rule groups returned by this method may - be written into files consumed by Prometheus as follows - - ``` - for topology_identifier, alert_rule_groups in self.metrics_consumer.alerts().items(): - filename = "juju_" + topology_identifier + ".rules" - path = os.path.join(PROMETHEUS_RULES_DIR, filename) - rules = yaml.safe_dump(alert_rule_groups) - container.push(path, rules, make_dirs=True) - ``` - - Returns: - A dictionary mapping the Juju topology identifier of the source charm to - its list of alert rule groups. - """ - alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files - for relation in self._charm.model.relations[self._relation_name]: - if not relation.units or not relation.app: - continue - - alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) - if not alert_rules: - continue - - alert_rules = self._inject_alert_expr_labels(alert_rules) - - identifier, topology = self._get_identifier_by_alert_rules(alert_rules) - if not topology: - try: - scrape_metadata = json.loads(relation.data[relation.app]["scrape_metadata"]) - identifier = JujuTopology.from_dict(scrape_metadata).identifier - - except KeyError as e: - logger.debug( - "Relation %s has no 'scrape_metadata': %s", - relation.id, - e, - ) - - if not identifier: - logger.error( - "Alert rules were found but no usable group or identifier was present." - ) - continue - - # We need to append the relation info to the identifier. This is to allow for cases for there are two - # relations which eventually scrape the same application. Issue #551. - identifier = f"{identifier}_{relation.name}_{relation.id}" - - alerts[identifier] = alert_rules - - _, errmsg = self._tool.validate_alert_rules(alert_rules) - if errmsg: - if alerts[identifier]: - del alerts[identifier] - if self._charm.unit.is_leader(): - data = json.loads(relation.data[self._charm.app].get("event", "{}")) - data["errors"] = errmsg - relation.data[self._charm.app]["event"] = json.dumps(data) - continue - - return alerts - - def _get_identifier_by_alert_rules( - self, rules: dict - ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: - """Determine an appropriate dict key for alert rules. - - The key is used as the filename when writing alerts to disk, so the structure - and uniqueness is important. - - Args: - rules: a dict of alert rules - Returns: - A tuple containing an identifier, if found, and a JujuTopology, if it could - be constructed. - """ - if "groups" not in rules: - logger.debug("No alert groups were found in relation data") - return None, None - - # Construct an ID based on what's in the alert rules if they have labels - for group in rules["groups"]: - try: - labels = group["rules"][0]["labels"] - topology = JujuTopology( - # Don't try to safely get required constructor fields. There's already - # a handler for KeyErrors - model_uuid=labels["juju_model_uuid"], - model=labels["juju_model"], - application=labels["juju_application"], - unit=labels.get("juju_unit", ""), - charm_name=labels.get("juju_charm", ""), - ) - return topology.identifier, topology - except KeyError: - logger.debug("Alert rules were found but no usable labels were present") - continue - - logger.warning( - "No labeled alert rules were found, and no 'scrape_metadata' " - "was available. Using the alert group name as filename." - ) - try: - for group in rules["groups"]: - return group["name"], None - except KeyError: - logger.debug("No group name was found to use as identifier") - - return None, None - - def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: - """Iterate through alert rules and inject topology into expressions. - - Args: - rules: a dict of alert rules - """ - if "groups" not in rules: - return rules - - modified_groups = [] - for group in rules["groups"]: - # Copy off rules, so we don't modify an object we're iterating over - rules_copy = group["rules"] - for idx, rule in enumerate(rules_copy): - labels = rule.get("labels") - - if labels: - try: - topology = JujuTopology( - # Don't try to safely get required constructor fields. There's already - # a handler for KeyErrors - model_uuid=labels["juju_model_uuid"], - model=labels["juju_model"], - application=labels["juju_application"], - unit=labels.get("juju_unit", ""), - charm_name=labels.get("juju_charm", ""), - ) - - # Inject topology and put it back in the list - rule["expr"] = self._tool.inject_label_matchers( - re.sub(r"%%juju_topology%%,?", "", rule["expr"]), - topology.alert_expression_dict, - ) - except KeyError: - # Some required JujuTopology key is missing. Just move on. - pass - - group["rules"][idx] = rule - - modified_groups.append(group) - - rules["groups"] = modified_groups - return rules - - def _static_scrape_config(self, relation) -> list: - """Generate the static scrape configuration for a single relation. - - If the relation data includes `scrape_metadata` then the value - of this key is used to annotate the scrape jobs with Juju - Topology labels before returning them. - - Args: - relation: an `ops.model.Relation` object whose static - scrape configuration is required. - - Returns: - A list (possibly empty) of scrape jobs. Each job is a - valid Prometheus scrape configuration for that job, - represented as a Python dictionary. - """ - if not relation.units: - return [] - - scrape_configs = json.loads(relation.data[relation.app].get("scrape_jobs", "[]")) - - if not scrape_configs: - return [] - - scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) - - if not scrape_metadata: - return scrape_configs - - topology = JujuTopology.from_dict(scrape_metadata) - - job_name_prefix = "juju_{}_prometheus_scrape".format(topology.identifier) - scrape_configs = PrometheusConfig.prefix_job_names(scrape_configs, job_name_prefix) - scrape_configs = PrometheusConfig.sanitize_scrape_configs(scrape_configs) - - hosts = self._relation_hosts(relation) - - scrape_configs = PrometheusConfig.expand_wildcard_targets_into_individual_jobs( - scrape_configs, hosts, topology - ) - - # For https scrape targets we still do not render a `tls_config` section because certs - # are expected to be made available by the charm via the `update-ca-certificates` mechanism. - return scrape_configs - - def _relation_hosts(self, relation: Relation) -> Dict[str, Tuple[str, str]]: - """Returns a mapping from unit names to (address, path) tuples, for the given relation.""" - hosts = {} - for unit in relation.units: - # TODO deprecate and remove unit.name - unit_name = relation.data[unit].get("prometheus_scrape_unit_name") or unit.name - # TODO deprecate and remove "prometheus_scrape_host" - unit_address = relation.data[unit].get( - "prometheus_scrape_unit_address" - ) or relation.data[unit].get("prometheus_scrape_host") - unit_path = relation.data[unit].get("prometheus_scrape_unit_path", "") - if unit_name and unit_address: - hosts.update({unit_name: (unit_address, unit_path)}) - - return hosts - - def _target_parts(self, target) -> list: - """Extract host and port from a wildcard target. - - Args: - target: a string specifying a scrape target. A - scrape target is expected to have the format - "host:port". The host part may be a wildcard - "*" and the port part can be missing (along - with ":") in which case port is set to 80. - - Returns: - a list with target host and port as in [host, port] - """ - if ":" in target: - parts = target.split(":") - else: - parts = [target, "80"] - - return parts - - -def _dedupe_job_names(jobs: List[dict]): - """Deduplicate a list of dicts by appending a hash to the value of the 'job_name' key. - - Additionally, fully de-duplicate any identical jobs. - - Args: - jobs: A list of prometheus scrape jobs - """ - jobs_copy = copy.deepcopy(jobs) - - # Convert to a dict with job names as keys - # I think this line is O(n^2) but it should be okay given the list sizes - jobs_dict = { - job["job_name"]: list(filter(lambda x: x["job_name"] == job["job_name"], jobs_copy)) - for job in jobs_copy - } - - # If multiple jobs have the same name, convert the name to "name_" - for key in jobs_dict: - if len(jobs_dict[key]) > 1: - for job in jobs_dict[key]: - job_json = json.dumps(job) - hashed = hashlib.sha256(job_json.encode()).hexdigest() - job["job_name"] = "{}_{}".format(job["job_name"], hashed) - new_jobs = [] - for key in jobs_dict: - new_jobs.extend(list(jobs_dict[key])) - - # Deduplicate jobs which are equal - # Again this in O(n^2) but it should be okay - deduped_jobs = [] - seen = [] - for job in new_jobs: - job_json = json.dumps(job) - hashed = hashlib.sha256(job_json.encode()).hexdigest() - if hashed in seen: - continue - seen.append(hashed) - deduped_jobs.append(job) - - return deduped_jobs - - -def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: - """Resolve the provided path items against the directory of the main file. - - Look up the directory of the `main.py` file being executed. This is normally - going to be the charm.py file of the charm including this library. Then, resolve - the provided path elements and, if the result path exists and is a directory, - return its absolute path; otherwise, raise en exception. - - Raises: - InvalidAlertRulePathError, if the path does not exist or is not a directory. - """ - charm_dir = Path(str(charm.charm_dir)) - if not charm_dir.exists() or not charm_dir.is_dir(): - # Operator Framework does not currently expose a robust - # way to determine the top level charm source directory - # that is consistent across deployed charms and unit tests - # Hence for unit tests the current working directory is used - # TODO: updated this logic when the following ticket is resolved - # https://github.com/canonical/operator/issues/643 - charm_dir = Path(os.getcwd()) - - alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) - - if not alerts_dir_path.exists(): - raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") - if not alerts_dir_path.is_dir(): - raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") - - return str(alerts_dir_path) - - -class MetricsEndpointProvider(Object): - """A metrics endpoint for Prometheus.""" - - on = MetricsEndpointProviderEvents() # pyright: ignore - - def __init__( - self, - charm, - relation_name: str = DEFAULT_RELATION_NAME, - jobs=None, - alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, - refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, - external_url: str = "", - lookaside_jobs_callable: Optional[Callable] = None, - ): - """Construct a metrics provider for a Prometheus charm. - - If your charm exposes a Prometheus metrics endpoint, the - `MetricsEndpointProvider` object enables your charm to easily - communicate how to reach that metrics endpoint. - - By default, a charm instantiating this object has the metrics - endpoints of each of its units scraped by the related Prometheus - charms. The scraped metrics are automatically tagged by the - Prometheus charms with Juju topology data via the - `juju_model_name`, `juju_model_uuid`, `juju_application_name` - and `juju_unit` labels. To support such tagging `MetricsEndpointProvider` - automatically forwards scrape metadata to a `MetricsEndpointConsumer` - (Prometheus charm). - - Scrape targets provided by `MetricsEndpointProvider` can be - customized when instantiating this object. For example in the - case of a charm exposing the metrics endpoint for each of its - units on port 8080 and the `/metrics` path, the - `MetricsEndpointProvider` can be instantiated as follows: - - self.metrics_endpoint_provider = MetricsEndpointProvider( - self, - jobs=[{ - "static_configs": [{"targets": ["*:8080"]}], - }]) - - The notation `*:` means "scrape each unit of this charm on port - ``. - - In case the metrics endpoints are not on the standard `/metrics` path, - a custom path can be specified as follows: - - self.metrics_endpoint_provider = MetricsEndpointProvider( - self, - jobs=[{ - "metrics_path": "/my/strange/metrics/path", - "static_configs": [{"targets": ["*:8080"]}], - }]) - - Note how the `jobs` argument is a list: this allows you to expose multiple - combinations of paths "metrics_path" and "static_configs" in case your charm - exposes multiple endpoints, which could happen, for example, when you have - multiple workload containers, with applications in each needing to be scraped. - The structure of the objects in the `jobs` list is one-to-one with the - `scrape_config` configuration item of Prometheus' own configuration (see - https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config - ), but with only a subset of the fields allowed. The permitted fields are - listed in `ALLOWED_KEYS` object in this charm library module. - - It is also possible to specify alert rules. By default, this library will look - into the `/prometheus_alert_rules`, which in a standard charm - layouts resolves to `src/prometheus_alert_rules`. Each alert rule goes into a - separate `*.rule` file. If the syntax of a rule is invalid, - the `MetricsEndpointProvider` logs an error and does not load the particular - rule. - - To avoid false positives and negatives in the evaluation of alert rules, - all ingested alert rule expressions are automatically qualified using Juju - Topology filters. This ensures that alert rules provided by your charm, trigger - alerts based only on data scrapped from your charm. For example an alert rule - such as the following - - alert: UnitUnavailable - expr: up < 1 - for: 0m - - will be automatically transformed into something along the lines of the following - - alert: UnitUnavailable - expr: up{juju_model=, juju_model_uuid=, juju_application=} < 1 - for: 0m - - An attempt will be made to validate alert rules prior to loading them into Prometheus. - If they are invalid, an event will be emitted from this object which charms can respond - to in order to set a meaningful status for administrators. - - This can be observed via `consumer.on.alert_rule_status_changed` which contains: - - The error(s) encountered when validating as `errors` - - A `valid` attribute, which can be used to reset the state of charms if alert rules - are updated via another mechanism (e.g. `cos-config`) and refreshed. - - Args: - charm: a `CharmBase` object that manages this - `MetricsEndpointProvider` object. Typically, this is - `self` in the instantiating class. - relation_name: an optional string name of the relation between `charm` - and the Prometheus charmed service. The default is "metrics-endpoint". - It is strongly advised not to change the default, so that people - deploying your charm will have a consistent experience with all - other charms that provide metrics endpoints. - jobs: an optional list of dictionaries where each - dictionary represents the Prometheus scrape - configuration for a single job. When not provided, a - default scrape configuration is provided for the - `/metrics` endpoint polling all units of the charm on port `80` - using the `MetricsEndpointProvider` object. - alert_rules_path: an optional path for the location of alert rules - files. Defaults to "./prometheus_alert_rules", - resolved relative to the directory hosting the charm entry file. - The alert rules are automatically updated on charm upgrade. - refresh_event: an optional bound event or list of bound events which - will be observed to re-set scrape job data (IP address and others) - external_url: an optional argument that represents an external url that - can be generated by an Ingress or a Proxy. - lookaside_jobs_callable: an optional `Callable` which should be invoked - when the job configuration is built as a secondary mapping. The callable - should return a `List[Dict]` which is syntactically identical to the - `jobs` parameter, but can be updated out of step initialization of - this library without disrupting the 'global' job spec. - - Raises: - RelationNotFoundError: If there is no relation in the charm's metadata.yaml - with the same name as provided via `relation_name` argument. - RelationInterfaceMismatchError: The relation with the same name as provided - via `relation_name` argument does not have the `prometheus_scrape` relation - interface. - RelationRoleMismatchError: If the relation with the same name as provided - via `relation_name` argument does not have the `RelationRole.provides` - role. - """ - _validate_relation_by_interface_and_direction( - charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides - ) - - try: - alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) - except InvalidAlertRulePathError as e: - logger.debug( - "Invalid Prometheus alert rules folder at %s: %s", - e.alert_rules_absolute_path, - e.message, - ) - - super().__init__(charm, relation_name) - self.topology = JujuTopology.from_charm(charm) - - self._charm = charm - self._alert_rules_path = alert_rules_path - self._relation_name = relation_name - # sanitize job configurations to the supported subset of parameters - jobs = [] if jobs is None else jobs - self._jobs = PrometheusConfig.sanitize_scrape_configs(jobs) - - if external_url: - external_url = ( - external_url if urlparse(external_url).scheme else ("http://" + external_url) - ) - self.external_url = external_url - self._lookaside_jobs = lookaside_jobs_callable - - events = self._charm.on[self._relation_name] - self.framework.observe(events.relation_changed, self._on_relation_changed) - - if not refresh_event: - # FIXME remove once podspec charms are verified. - # `self.set_scrape_job_spec()` is called every re-init so this should not be needed. - if len(self._charm.meta.containers) == 1: - if "kubernetes" in self._charm.meta.series: - # This is a podspec charm - refresh_event = [self._charm.on.update_status] - else: - # This is a sidecar/pebble charm - container = list(self._charm.meta.containers.values())[0] - refresh_event = [self._charm.on[container.name.replace("-", "_")].pebble_ready] - else: - logger.warning( - "%d containers are present in metadata.yaml and " - "refresh_event was not specified. Defaulting to update_status. " - "Metrics IP may not be set in a timely fashion.", - len(self._charm.meta.containers), - ) - refresh_event = [self._charm.on.update_status] - - else: - if not isinstance(refresh_event, list): - refresh_event = [refresh_event] - - self.framework.observe(events.relation_joined, self.set_scrape_job_spec) - for ev in refresh_event: - self.framework.observe(ev, self.set_scrape_job_spec) - - def _on_relation_changed(self, event): - """Check for alert rule messages in the relation data before moving on.""" - if self._charm.unit.is_leader(): - ev = json.loads(event.relation.data[event.app].get("event", "{}")) - - if ev: - valid = bool(ev.get("valid", True)) - errors = ev.get("errors", "") - - if valid and not errors: - self.on.alert_rule_status_changed.emit(valid=valid) - else: - self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) - - scrape_errors = ev.get("scrape_job_errors", None) - if scrape_errors: - self.on.invalid_scrape_job.emit(errors=scrape_errors) - - def update_scrape_job_spec(self, jobs): - """Update scrape job specification.""" - self._jobs = PrometheusConfig.sanitize_scrape_configs(jobs) - self.set_scrape_job_spec() - - def set_scrape_job_spec(self, _=None): - """Ensure scrape target information is made available to prometheus. - - When a metrics provider charm is related to a prometheus charm, the - metrics provider sets specification and metadata related to its own - scrape configuration. This information is set using Juju application - data. In addition, each of the consumer units also sets its own - host address in Juju unit relation data. - """ - self._set_unit_ip() - - if not self._charm.unit.is_leader(): - return - - alert_rules = AlertRules(query_type="promql", topology=self.topology) - alert_rules.add_path(self._alert_rules_path, recursive=True) - alert_rules_as_dict = alert_rules.as_dict() - - for relation in self._charm.model.relations[self._relation_name]: - relation.data[self._charm.app]["scrape_metadata"] = json.dumps(self._scrape_metadata) - relation.data[self._charm.app]["scrape_jobs"] = json.dumps(self._scrape_jobs) - - if alert_rules_as_dict: - # Update relation data with the string representation of the rule file. - # Juju topology is already included in the "scrape_metadata" field above. - # The consumer side of the relation uses this information to name the rules file - # that is written to the filesystem. - relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict) - - def _set_unit_ip(self, _=None): - """Set unit host address. - - Each time a metrics provider charm container is restarted it updates its own - host address in the unit relation data for the prometheus charm. - - The only argument specified is an event, and it ignored. This is for expediency - to be able to use this method as an event handler, although no access to the - event is actually needed. - """ - for relation in self._charm.model.relations[self._relation_name]: - unit_ip = str(self._charm.model.get_binding(relation).network.bind_address) - - # TODO store entire url in relation data, instead of only select url parts. - - if self.external_url: - parsed = urlparse(self.external_url) - unit_address = parsed.hostname - path = parsed.path - elif self._is_valid_unit_address(unit_ip): - unit_address = unit_ip - path = "" - else: - unit_address = socket.getfqdn() - path = "" - - relation.data[self._charm.unit]["prometheus_scrape_unit_address"] = unit_address - relation.data[self._charm.unit]["prometheus_scrape_unit_path"] = path - relation.data[self._charm.unit]["prometheus_scrape_unit_name"] = str( - self._charm.model.unit.name - ) - - def _is_valid_unit_address(self, address: str) -> bool: - """Validate a unit address. - - At present only IP address validation is supported, but - this may be extended to DNS addresses also, as needed. - - Args: - address: a string representing a unit address - """ - try: - _ = ipaddress.ip_address(address) - except ValueError: - return False - - return True - - @property - def _scrape_jobs(self) -> list: - """Fetch list of scrape jobs. - - Returns: - A list of dictionaries, where each dictionary specifies a - single scrape job for Prometheus. - """ - jobs = self._jobs or [] - if callable(self._lookaside_jobs): - jobs.extend(PrometheusConfig.sanitize_scrape_configs(self._lookaside_jobs())) - return jobs or [DEFAULT_JOB] - - @property - def _scrape_metadata(self) -> dict: - """Generate scrape metadata. - - Returns: - Scrape configuration metadata for this metrics provider charm. - """ - return self.topology.as_dict() - - -class PrometheusRulesProvider(Object): - """Forward rules to Prometheus. - - This object may be used to forward rules to Prometheus. At present it only supports - forwarding alert rules. This is unlike :class:`MetricsEndpointProvider`, which - is used for forwarding both scrape targets and associated alert rules. This object - is typically used when there is a desire to forward rules that apply globally (across - all deployed charms and units) rather than to a single charm. All rule files are - forwarded using the same 'prometheus_scrape' interface that is also used by - `MetricsEndpointProvider`. - - Args: - charm: A charm instance that `provides` a relation with the `prometheus_scrape` interface. - relation_name: Name of the relation in `metadata.yaml` that - has the `prometheus_scrape` interface. - dir_path: Root directory for the collection of rule files. - recursive: Whether to scan for rule files recursively. - """ - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - dir_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, - recursive=True, - ): - super().__init__(charm, relation_name) - self._charm = charm - self._relation_name = relation_name - self._recursive = recursive - - try: - dir_path = _resolve_dir_against_charm_path(charm, dir_path) - except InvalidAlertRulePathError as e: - logger.debug( - "Invalid Prometheus alert rules folder at %s: %s", - e.alert_rules_absolute_path, - e.message, - ) - self.dir_path = dir_path - - events = self._charm.on[self._relation_name] - event_sources = [ - events.relation_joined, - events.relation_changed, - self._charm.on.leader_elected, - self._charm.on.upgrade_charm, - ] - - for event_source in event_sources: - self.framework.observe(event_source, self._update_relation_data) - - def _reinitialize_alert_rules(self): - """Reloads alert rules and updates all relations.""" - self._update_relation_data(None) - - def _update_relation_data(self, _): - """Update application relation data with alert rules for all relations.""" - if not self._charm.unit.is_leader(): - return - - alert_rules = AlertRules(query_type="promql") - alert_rules.add_path(self.dir_path, recursive=self._recursive) - alert_rules_as_dict = alert_rules.as_dict() - - logger.info("Updating relation data with rule files from disk") - for relation in self._charm.model.relations[self._relation_name]: - relation.data[self._charm.app]["alert_rules"] = json.dumps( - alert_rules_as_dict, - sort_keys=True, # sort, to prevent unnecessary relation_changed events - ) - - -class MetricsEndpointAggregator(Object): - """Aggregate metrics from multiple scrape targets. - - `MetricsEndpointAggregator` collects scrape target information from one - or more related charms and forwards this to a `MetricsEndpointConsumer` - charm, which may be in a different Juju model. However, it is - essential that `MetricsEndpointAggregator` itself resides in the same - model as its scrape targets, as this is currently the only way to - ensure in Juju that the `MetricsEndpointAggregator` will be able to - determine the model name and uuid of the scrape targets. - - `MetricsEndpointAggregator` should be used in place of - `MetricsEndpointProvider` in the following two use cases: - - 1. Integrating one or more scrape targets that do not support the - `prometheus_scrape` interface. - - 2. Integrating one or more scrape targets through cross model - relations. Although the [Scrape Config Operator](https://charmhub.io/cos-configuration-k8s) - may also be used for the purpose of supporting cross model - relations. - - Using `MetricsEndpointAggregator` to build a Prometheus charm client - only requires instantiating it. Instantiating - `MetricsEndpointAggregator` is similar to `MetricsEndpointProvider` except - that it requires specifying the names of three relations: the - relation with scrape targets, the relation for alert rules, and - that with the Prometheus charms. For example - - ```python - self._aggregator = MetricsEndpointAggregator( - self, - { - "prometheus": "monitoring", - "scrape_target": "prometheus-target", - "alert_rules": "prometheus-rules" - } - ) - ``` - - `MetricsEndpointAggregator` assumes that each unit of a scrape target - sets in its unit-level relation data two entries with keys - "hostname" and "port". If it is required to integrate with charms - that do not honor these assumptions, it is always possible to - derive from `MetricsEndpointAggregator` overriding the `_get_targets()` - method, which is responsible for aggregating the unit name, host - address ("hostname") and port of the scrape target. - `MetricsEndpointAggregator` also assumes that each unit of a - scrape target sets in its unit-level relation data a key named - "groups". The value of this key is expected to be the string - representation of list of Prometheus Alert rules in YAML format. - An example of a single such alert rule is - - ```yaml - - alert: HighRequestLatency - expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 - for: 10m - labels: - severity: page - annotations: - summary: High request latency - ``` - - Once again if it is required to integrate with charms that do not - honour these assumptions about alert rules then an object derived - from `MetricsEndpointAggregator` may be used by overriding the - `_get_alert_rules()` method. - - `MetricsEndpointAggregator` ensures that Prometheus scrape job - specifications and alert rules are annotated with Juju topology - information, just like `MetricsEndpointProvider` and - `MetricsEndpointConsumer` do. - - By default, `MetricsEndpointAggregator` ensures that Prometheus - "instance" labels refer to Juju topology. This ensures that - instance labels are stable over unit recreation. While it is not - advisable to change this option, if required it can be done by - setting the "relabel_instance" keyword argument to `False` when - constructing an aggregator object. - """ - - _stored = StoredState() - - def __init__( - self, - charm, - relation_names: Optional[dict] = None, - relabel_instance=True, - resolve_addresses=False, - ): - """Construct a `MetricsEndpointAggregator`. - - Args: - charm: a `CharmBase` object that manages this - `MetricsEndpointAggregator` object. Typically, this is - `self` in the instantiating class. - relation_names: a dictionary with three keys. The value - of the "scrape_target" and "alert_rules" keys are - the relation names over which scrape job and alert rule - information is gathered by this `MetricsEndpointAggregator`. - And the value of the "prometheus" key is the name of - the relation with a `MetricsEndpointConsumer` such as - the Prometheus charm. - relabel_instance: A boolean flag indicating if Prometheus - scrape job "instance" labels must refer to Juju Topology. - resolve_addresses: A boolean flag indiccating if the aggregator - should attempt to perform DNS lookups of targets and append - a `dns_name` label - """ - self._charm = charm - - relation_names = relation_names or {} - - self._prometheus_relation = relation_names.get( - "prometheus", "downstream-prometheus-scrape" - ) - self._target_relation = relation_names.get("scrape_target", "prometheus-target") - self._alert_rules_relation = relation_names.get("alert_rules", "prometheus-rules") - - super().__init__(charm, self._prometheus_relation) - self._stored.set_default(jobs=[], alert_rules=[]) - - self._relabel_instance = relabel_instance - self._resolve_addresses = resolve_addresses - - # manage Prometheus charm relation events - prometheus_events = self._charm.on[self._prometheus_relation] - self.framework.observe(prometheus_events.relation_joined, self._set_prometheus_data) - - # manage list of Prometheus scrape jobs from related scrape targets - target_events = self._charm.on[self._target_relation] - self.framework.observe(target_events.relation_changed, self._on_prometheus_targets_changed) - self.framework.observe( - target_events.relation_departed, self._on_prometheus_targets_departed - ) - - # manage alert rules for Prometheus from related scrape targets - alert_rule_events = self._charm.on[self._alert_rules_relation] - self.framework.observe(alert_rule_events.relation_changed, self._on_alert_rules_changed) - self.framework.observe(alert_rule_events.relation_departed, self._on_alert_rules_departed) - - def _set_prometheus_data(self, event): - """Ensure every new Prometheus instances is updated. - - Any time a new Prometheus unit joins the relation with - `MetricsEndpointAggregator`, that Prometheus unit is provided - with the complete set of existing scrape jobs and alert rules. - """ - if not self._charm.unit.is_leader(): - return - - jobs = [] + _type_convert_stored( - self._stored.jobs # pyright: ignore - ) # list of scrape jobs, one per relation - for relation in self.model.relations[self._target_relation]: - targets = self._get_targets(relation) - if targets and relation.app: - jobs.append(self._static_scrape_job(targets, relation.app.name)) - - groups = [] + _type_convert_stored( - self._stored.alert_rules # pyright: ignore - ) # list of alert rule groups - for relation in self.model.relations[self._alert_rules_relation]: - unit_rules = self._get_alert_rules(relation) - if unit_rules and relation.app: - appname = relation.app.name - rules = self._label_alert_rules(unit_rules, appname) - group = {"name": self.group_name(appname), "rules": rules} - groups.append(group) - - event.relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) - event.relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups}) - - def _on_prometheus_targets_changed(self, event): - """Update scrape jobs in response to scrape target changes. - - When there is any change in relation data with any scrape - target, the Prometheus scrape job, for that specific target is - updated. - """ - targets = self._get_targets(event.relation) - if not targets: - return - - # new scrape job for the relation that has changed - self.set_target_job_data(targets, event.relation.app.name) - - def set_target_job_data(self, targets: dict, app_name: str, **kwargs) -> None: - """Update scrape jobs in response to scrape target changes. - - When there is any change in relation data with any scrape - target, the Prometheus scrape job, for that specific target is - updated. Additionally, if this method is called manually, do the - same. - - Args: - targets: a `dict` containing target information - app_name: a `str` identifying the application - kwargs: a `dict` of the extra arguments passed to the function - """ - if not self._charm.unit.is_leader(): - return - - # new scrape job for the relation that has changed - updated_job = self._static_scrape_job(targets, app_name, **kwargs) - - for relation in self.model.relations[self._prometheus_relation]: - jobs = json.loads(relation.data[self._charm.app].get("scrape_jobs", "[]")) - # list of scrape jobs that have not changed - jobs = [job for job in jobs if updated_job["job_name"] != job["job_name"]] - jobs.append(updated_job) - relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) - - if not _type_convert_stored(self._stored.jobs) == jobs: # pyright: ignore - self._stored.jobs = jobs - - def _on_prometheus_targets_departed(self, event): - """Remove scrape jobs when a target departs. - - Any time a scrape target departs, any Prometheus scrape job - associated with that specific scrape target is removed. - """ - job_name = self._job_name(event.relation.app.name) - unit_name = event.unit.name - self.remove_prometheus_jobs(job_name, unit_name) - - def remove_prometheus_jobs(self, job_name: str, unit_name: Optional[str] = ""): - """Given a job name and unit name, remove scrape jobs associated. - - The `unit_name` parameter is used for automatic, relation data bag-based - generation, where the unit name in labels can be used to ensure that jobs with - similar names (which are generated via the app name when scanning relation data - bags) are not accidentally removed, as their unit name labels will differ. - For NRPE, the job name is calculated from an ID sent via the NRPE relation, and is - sufficient to uniquely identify the target. - """ - if not self._charm.unit.is_leader(): - return - - for relation in self.model.relations[self._prometheus_relation]: - jobs = json.loads(relation.data[self._charm.app].get("scrape_jobs", "[]")) - if not jobs: - continue - - changed_job = [j for j in jobs if j.get("job_name") == job_name] - if not changed_job: - continue - changed_job = changed_job[0] - - # list of scrape jobs that have not changed - jobs = [job for job in jobs if job.get("job_name") != job_name] - - # list of scrape jobs for units of the same application that still exist - configs_kept = [ - config - for config in changed_job["static_configs"] # type: ignore - if config.get("labels", {}).get("juju_unit") != unit_name - ] - - if configs_kept: - changed_job["static_configs"] = configs_kept # type: ignore - jobs.append(changed_job) - - relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) - - if not _type_convert_stored(self._stored.jobs) == jobs: # pyright: ignore - self._stored.jobs = jobs - - def _job_name(self, appname) -> str: - """Construct a scrape job name. - - Each relation has its own unique scrape job name. All units in - the relation are scraped as part of the same scrape job. - - Args: - appname: string name of a related application. - - Returns: - a string Prometheus scrape job name for the application. - """ - return "juju_{}_{}_{}_prometheus_scrape".format( - self.model.name, self.model.uuid[:7], appname - ) - - def _get_targets(self, relation) -> dict: - """Fetch scrape targets for a relation. - - Scrape target information is returned for each unit in the - relation. This information contains the unit name, network - hostname (or address) for that unit, and port on which a - metrics endpoint is exposed in that unit. - - Args: - relation: an `ops.model.Relation` object for which scrape - targets are required. - - Returns: - a dictionary whose keys are names of the units in the - relation. There values associated with each key is itself - a dictionary of the form - ``` - {"hostname": hostname, "port": port} - ``` - """ - targets = {} - for unit in relation.units: - port = relation.data[unit].get("port", 80) - hostname = relation.data[unit].get("hostname") - if hostname: - targets.update({unit.name: {"hostname": hostname, "port": port}}) - - return targets - - def _static_scrape_job(self, targets, application_name, **kwargs) -> dict: - """Construct a static scrape job for an application. - - Args: - targets: a dictionary providing hostname and port for all - scrape target. The keys of this dictionary are unit - names. Values corresponding to these keys are - themselves a dictionary with keys "hostname" and - "port". - application_name: a string name of the application for - which this static scrape job is being constructed. - kwargs: a `dict` of the extra arguments passed to the function - - Returns: - A dictionary corresponding to a Prometheus static scrape - job configuration for one application. The returned - dictionary may be transformed into YAML and appended to - the list of any existing list of Prometheus static configs. - """ - juju_model = self.model.name - juju_model_uuid = self.model.uuid - - job = { - "job_name": self._job_name(application_name), - "static_configs": [ - { - "targets": ["{}:{}".format(target["hostname"], target["port"])], - "labels": { - "juju_model": juju_model, - "juju_model_uuid": juju_model_uuid, - "juju_application": application_name, - "juju_unit": unit_name, - "host": target["hostname"], - # Expanding this will merge the dicts and replace the - # topology labels if any were present/found - **self._static_config_extra_labels(target), - }, - } - for unit_name, target in targets.items() - ], - "relabel_configs": self._relabel_configs + kwargs.get("relabel_configs", []), - } - job.update(kwargs.get("updates", {})) - - return job - - def _static_config_extra_labels(self, target: Dict[str, str]) -> Dict[str, str]: - """Build a list of extra static config parameters, if specified.""" - extra_info = {} - - if self._resolve_addresses: - try: - dns_name = socket.gethostbyaddr(target["hostname"])[0] - except OSError: - logger.debug("Could not perform DNS lookup for %s", target["hostname"]) - dns_name = target["hostname"] - extra_info["dns_name"] = dns_name - - return extra_info - - @property - def _relabel_configs(self) -> list: - """Create Juju topology relabeling configuration. - - Using Juju topology for instance labels ensures that these - labels are stable across unit recreation. - - Returns: - a list of Prometheus relabeling configurations. Each item in - this list is one relabel configuration. - """ - return ( - [ - { - "source_labels": [ - "juju_model", - "juju_model_uuid", - "juju_application", - "juju_unit", - ], - "separator": "_", - "target_label": "instance", - "regex": "(.*)", - } - ] - if self._relabel_instance - else [] - ) - - def _on_alert_rules_changed(self, event): - """Update alert rules in response to scrape target changes. - - When there is any change in alert rule relation data for any - scrape target, the list of alert rules for that specific - target is updated. - """ - unit_rules = self._get_alert_rules(event.relation) - if not unit_rules: - return - - app_name = event.relation.app.name - self.set_alert_rule_data(app_name, unit_rules) - - def set_alert_rule_data(self, name: str, unit_rules: dict, label_rules: bool = True) -> None: - """Update alert rule data. - - The unit rules should be a dict, which is has additional Juju topology labels added. For - rules generated by the NRPE exporter, they are pre-labeled so lookups can be performed. - """ - if not self._charm.unit.is_leader(): - return - - if label_rules: - rules = self._label_alert_rules(unit_rules, name) - else: - rules = [unit_rules] - updated_group = {"name": self.group_name(name), "rules": rules} - - for relation in self.model.relations[self._prometheus_relation]: - alert_rules = json.loads(relation.data[self._charm.app].get("alert_rules", "{}")) - groups = alert_rules.get("groups", []) - # list of alert rule groups that have not changed - for group in groups: - if group["name"] == updated_group["name"]: - group["rules"] = [r for r in group["rules"] if r not in updated_group["rules"]] - group["rules"].extend(updated_group["rules"]) - - if updated_group["name"] not in [g["name"] for g in groups]: - groups.append(updated_group) - relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups}) - - if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore - self._stored.alert_rules = groups - - def _on_alert_rules_departed(self, event): - """Remove alert rules for departed targets. - - Any time a scrape target departs any alert rules associated - with that specific scrape target is removed. - """ - group_name = self.group_name(event.relation.app.name) - unit_name = event.unit.name - self.remove_alert_rules(group_name, unit_name) - - def remove_alert_rules(self, group_name: str, unit_name: str) -> None: - """Remove an alert rule group from relation data.""" - if not self._charm.unit.is_leader(): - return - - for relation in self.model.relations[self._prometheus_relation]: - alert_rules = json.loads(relation.data[self._charm.app].get("alert_rules", "{}")) - if not alert_rules: - continue - - groups = alert_rules.get("groups", []) - if not groups: - continue - - changed_group = [group for group in groups if group["name"] == group_name] - if not changed_group: - continue - changed_group = changed_group[0] - - # list of alert rule groups that have not changed - groups = [group for group in groups if group["name"] != group_name] - - # list of alert rules not associated with departing unit - rules_kept = [ - rule - for rule in changed_group.get("rules") # type: ignore - if rule.get("labels").get("juju_unit") != unit_name - ] - - if rules_kept: - changed_group["rules"] = rules_kept # type: ignore - groups.append(changed_group) - - relation.data[self._charm.app]["alert_rules"] = ( - json.dumps({"groups": groups}) if groups else "{}" - ) - - if not _type_convert_stored(self._stored.alert_rules) == groups: # pyright: ignore - self._stored.alert_rules = groups - - def _get_alert_rules(self, relation) -> dict: - """Fetch alert rules for a relation. - - Each unit of the related scrape target may have its own - associated alert rules. Alert rules for all units are returned - indexed by unit name. - - Args: - relation: an `ops.model.Relation` object for which alert - rules are required. - - Returns: - a dictionary whose keys are names of the units in the - relation. There values associated with each key is a list - of alert rules. Each rule is in dictionary format. The - structure "rule dictionary" corresponds to single - Prometheus alert rule. - """ - rules = {} - for unit in relation.units: - unit_rules = yaml.safe_load(relation.data[unit].get("groups", "")) - if unit_rules: - rules.update({unit.name: unit_rules}) - - return rules - - def group_name(self, unit_name: str) -> str: - """Construct name for an alert rule group. - - Each unit in a relation may define its own alert rules. All - rules, for all units in a relation are grouped together and - given a single alert rule group name. - - Args: - unit_name: string name of a related application. - - Returns: - a string Prometheus alert rules group name for the unit. - """ - unit_name = re.sub(r"/", "_", unit_name) - return "juju_{}_{}_{}_alert_rules".format(self.model.name, self.model.uuid[:7], unit_name) - - def _label_alert_rules(self, unit_rules, app_name: str) -> list: - """Apply juju topology labels to alert rules. - - Args: - unit_rules: a list of alert rules, where each rule is in - dictionary format. - app_name: a string name of the application to which the - alert rules belong. - - Returns: - a list of alert rules with Juju topology labels. - """ - labeled_rules = [] - for unit_name, rules in unit_rules.items(): - for rule in rules: - # the new JujuTopology removed this, so build it up by hand - matchers = { - "juju_{}".format(k): v - for k, v in JujuTopology(self.model.name, self.model.uuid, app_name, unit_name) - .as_dict(excluded_keys=["charm_name"]) - .items() - } - rule["labels"].update(matchers.items()) - labeled_rules.append(rule) - - return labeled_rules - - -class CosTool: - """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" - - _path = None - _disabled = False - - def __init__(self, charm): - self._charm = charm - - @property - def path(self): - """Lazy lookup of the path of cos-tool.""" - if self._disabled: - return None - if not self._path: - self._path = self._get_tool_path() - if not self._path: - logger.debug("Skipping injection of juju topology as label matchers") - self._disabled = True - return self._path - - def apply_label_matchers(self, rules) -> dict: - """Will apply label matchers to the expression of all alerts in all supplied groups.""" - if not self.path: - return rules - for group in rules["groups"]: - rules_in_group = group.get("rules", []) - for rule in rules_in_group: - topology = {} - # if the user for some reason has provided juju_unit, we'll need to honor it - # in most cases, however, this will be empty - for label in [ - "juju_model", - "juju_model_uuid", - "juju_application", - "juju_charm", - "juju_unit", - ]: - if label in rule["labels"]: - topology[label] = rule["labels"][label] - - rule["expr"] = self.inject_label_matchers(rule["expr"], topology) - return rules - - def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: - """Will validate correctness of alert rules, returning a boolean and any errors.""" - if not self.path: - logger.debug("`cos-tool` unavailable. Not validating alert correctness.") - return True, "" - - with tempfile.TemporaryDirectory() as tmpdir: - rule_path = Path(tmpdir + "/validate_rule.yaml") - rule_path.write_text(yaml.dump(rules)) - - args = [str(self.path), "validate", str(rule_path)] - # noinspection PyBroadException - try: - self._exec(args) - return True, "" - except subprocess.CalledProcessError as e: - logger.debug("Validating the rules failed: %s", e.output) - return False, ", ".join( - [ - line - for line in e.output.decode("utf8").splitlines() - if "error validating" in line - ] - ) - - def validate_scrape_jobs(self, jobs: list) -> bool: - """Validate scrape jobs using cos-tool.""" - if not self.path: - logger.debug("`cos-tool` unavailable. Not validating scrape jobs.") - return True - conf = {"scrape_configs": jobs} - with tempfile.NamedTemporaryFile() as tmpfile: - with open(tmpfile.name, "w") as f: - f.write(yaml.safe_dump(conf)) - try: - self._exec([str(self.path), "validate-config", tmpfile.name]) - except subprocess.CalledProcessError as e: - logger.error("Validating scrape jobs failed: {}".format(e.output)) - raise - return True - - def inject_label_matchers(self, expression, topology) -> str: - """Add label matchers to an expression.""" - if not topology: - return expression - if not self.path: - logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) - return expression - args = [str(self.path), "transform"] - args.extend( - ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] - ) - - args.extend(["{}".format(expression)]) - # noinspection PyBroadException - try: - return self._exec(args) - except subprocess.CalledProcessError as e: - logger.debug('Applying the expression failed: "%s", falling back to the original', e) - return expression - - def _get_tool_path(self) -> Optional[Path]: - arch = platform.machine() - arch = "amd64" if arch == "x86_64" else arch - res = "cos-tool-{}".format(arch) - try: - path = Path(res).resolve() - path.chmod(0o777) - return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: - logger.debug('Could not locate cos-tool at: "{}"'.format(res)) - return None - - def _exec(self, cmd) -> str: - result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - return result.stdout.decode("utf-8").strip() diff --git a/charm/lib/charms/traefik_k8s/v2/ingress.py b/charm/lib/charms/traefik_k8s/v2/ingress.py deleted file mode 100644 index 31028e9..0000000 --- a/charm/lib/charms/traefik_k8s/v2/ingress.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. - -r"""# Interface Library for ingress. - -This library wraps relation endpoints using the `ingress` interface -and provides a Python API for both requesting and providing per-application -ingress, with load-balancing occurring across all units. - -## Getting Started - -To get started using the library, you just need to fetch the library using `charmcraft`. - -```shell -cd some-charm -charmcraft fetch-lib charms.traefik_k8s.v1.ingress -``` - -In the `metadata.yaml` of the charm, add the following: - -```yaml -requires: - ingress: - interface: ingress - limit: 1 -``` - -Then, to initialise the library: - -```python -from charms.traefik_k8s.v2.ingress import (IngressPerAppRequirer, - IngressPerAppReadyEvent, IngressPerAppRevokedEvent) - -class SomeCharm(CharmBase): - def __init__(self, *args): - # ... - self.ingress = IngressPerAppRequirer(self, port=80) - # The following event is triggered when the ingress URL to be used - # by this deployment of the `SomeCharm` is ready (or changes). - self.framework.observe( - self.ingress.on.ready, self._on_ingress_ready - ) - self.framework.observe( - self.ingress.on.revoked, self._on_ingress_revoked - ) - - def _on_ingress_ready(self, event: IngressPerAppReadyEvent): - logger.info("This app's ingress URL: %s", event.url) - - def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): - logger.info("This app no longer has ingress") -""" -import ipaddress -import json -import logging -import socket -import typing -from dataclasses import dataclass -from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union - -import pydantic -from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent -from ops.framework import EventSource, Object, ObjectEvents, StoredState -from ops.model import ModelError, Relation, Unit -from pydantic import AnyHttpUrl, BaseModel, Field, validator - -# The unique Charmhub library identifier, never change it -LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" - -# Increment this major API version when introducing breaking changes -LIBAPI = 2 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 8 - -PYDEPS = ["pydantic<2.0"] - -DEFAULT_RELATION_NAME = "ingress" -RELATION_INTERFACE = "ingress" - -log = logging.getLogger(__name__) -BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} - - -class DatabagModel(BaseModel): - """Base databag model.""" - - class Config: - """Pydantic config.""" - - allow_population_by_field_name = True - """Allow instantiating this class by field name (instead of forcing alias).""" - - _NEST_UNDER = None - - @classmethod - def load(cls, databag: MutableMapping): - """Load this model from a Juju databag.""" - if cls._NEST_UNDER: - return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) - - try: - data = {k: json.loads(v) for k, v in databag.items() if k not in BUILTIN_JUJU_KEYS} - except json.JSONDecodeError as e: - msg = f"invalid databag contents: expecting json. {databag}" - log.error(msg) - raise DataValidationError(msg) from e - - try: - return cls.parse_raw(json.dumps(data)) # type: ignore - except pydantic.ValidationError as e: - msg = f"failed to validate databag: {databag}" - log.error(msg, exc_info=True) - raise DataValidationError(msg) from e - - def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): - """Write the contents of this model to Juju databag. - - :param databag: the databag to write the data to. - :param clear: ensure the databag is cleared before writing it. - """ - if clear and databag: - databag.clear() - - if databag is None: - databag = {} - - if self._NEST_UNDER: - databag[self._NEST_UNDER] = self.json() - - dct = self.dict() - for key, field in self.__fields__.items(): # type: ignore - value = dct[key] - databag[field.alias or key] = json.dumps(value) - - return databag - - -# todo: import these models from charm-relation-interfaces/ingress/v2 instead of redeclaring them -class IngressUrl(BaseModel): - """Ingress url schema.""" - - url: AnyHttpUrl - - -class IngressProviderAppData(DatabagModel): - """Ingress application databag schema.""" - - ingress: IngressUrl - - -class ProviderSchema(BaseModel): - """Provider schema for Ingress.""" - - app: IngressProviderAppData - - -class IngressRequirerAppData(DatabagModel): - """Ingress requirer application databag model.""" - - model: str = Field(description="The model the application is in.") - name: str = Field(description="the name of the app requesting ingress.") - port: int = Field(description="The port the app wishes to be exposed.") - - # fields on top of vanilla 'ingress' interface: - strip_prefix: Optional[bool] = Field( - description="Whether to strip the prefix from the ingress url.", alias="strip-prefix" - ) - redirect_https: Optional[bool] = Field( - description="Whether to redirect http traffic to https.", alias="redirect-https" - ) - - scheme: Optional[str] = Field( - default="http", description="What scheme to use in the generated ingress url" - ) - - @validator("scheme", pre=True) - def validate_scheme(cls, scheme): # noqa: N805 # pydantic wants 'cls' as first arg - """Validate scheme arg.""" - if scheme not in {"http", "https", "h2c"}: - raise ValueError("invalid scheme: should be one of `http|https|h2c`") - return scheme - - @validator("port", pre=True) - def validate_port(cls, port): # noqa: N805 # pydantic wants 'cls' as first arg - """Validate port.""" - assert isinstance(port, int), type(port) - assert 0 < port < 65535, "port out of TCP range" - return port - - -class IngressRequirerUnitData(DatabagModel): - """Ingress requirer unit databag model.""" - - host: str = Field(description="Hostname at which the unit is reachable.") - ip: Optional[str] = Field( - description="IP at which the unit is reachable, " - "IP can only be None if the IP information can't be retrieved from juju." - ) - - @validator("host", pre=True) - def validate_host(cls, host): # noqa: N805 # pydantic wants 'cls' as first arg - """Validate host.""" - assert isinstance(host, str), type(host) - return host - - @validator("ip", pre=True) - def validate_ip(cls, ip): # noqa: N805 # pydantic wants 'cls' as first arg - """Validate ip.""" - if ip is None: - return None - if not isinstance(ip, str): - raise TypeError(f"got ip of type {type(ip)} instead of expected str") - try: - ipaddress.IPv4Address(ip) - return ip - except ipaddress.AddressValueError: - pass - try: - ipaddress.IPv6Address(ip) - return ip - except ipaddress.AddressValueError: - raise ValueError(f"{ip!r} is not a valid ip address") - - -class RequirerSchema(BaseModel): - """Requirer schema for Ingress.""" - - app: IngressRequirerAppData - unit: IngressRequirerUnitData - - -class IngressError(RuntimeError): - """Base class for custom errors raised by this library.""" - - -class NotReadyError(IngressError): - """Raised when a relation is not ready.""" - - -class DataValidationError(IngressError): - """Raised when data validation fails on IPU relation data.""" - - -class _IngressPerAppBase(Object): - """Base class for IngressPerUnit interface classes.""" - - def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): - super().__init__(charm, relation_name) - - self.charm: CharmBase = charm - self.relation_name = relation_name - self.app = self.charm.app - self.unit = self.charm.unit - - observe = self.framework.observe - rel_events = charm.on[relation_name] - observe(rel_events.relation_created, self._handle_relation) - observe(rel_events.relation_joined, self._handle_relation) - observe(rel_events.relation_changed, self._handle_relation) - observe(rel_events.relation_departed, self._handle_relation) - observe(rel_events.relation_broken, self._handle_relation_broken) - observe(charm.on.leader_elected, self._handle_upgrade_or_leader) # type: ignore - observe(charm.on.upgrade_charm, self._handle_upgrade_or_leader) # type: ignore - - @property - def relations(self): - """The list of Relation instances associated with this endpoint.""" - return list(self.charm.model.relations[self.relation_name]) - - def _handle_relation(self, event): - """Subclasses should implement this method to handle a relation update.""" - pass - - def _handle_relation_broken(self, event): - """Subclasses should implement this method to handle a relation breaking.""" - pass - - def _handle_upgrade_or_leader(self, event): - """Subclasses should implement this method to handle upgrades or leadership change.""" - pass - - -class _IPAEvent(RelationEvent): - __args__: Tuple[str, ...] = () - __optional_kwargs__: Dict[str, Any] = {} - - @classmethod - def __attrs__(cls): - return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) - - def __init__(self, handle, relation, *args, **kwargs): - super().__init__(handle, relation) - - if not len(self.__args__) == len(args): - raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) - - for attr, obj in zip(self.__args__, args): - setattr(self, attr, obj) - for attr, default in self.__optional_kwargs__.items(): - obj = kwargs.get(attr, default) - setattr(self, attr, obj) - - def snapshot(self): - dct = super().snapshot() - for attr in self.__attrs__(): - obj = getattr(self, attr) - try: - dct[attr] = obj - except ValueError as e: - raise ValueError( - "cannot automagically serialize {}: " - "override this method and do it " - "manually.".format(obj) - ) from e - - return dct - - def restore(self, snapshot) -> None: - super().restore(snapshot) - for attr, obj in snapshot.items(): - setattr(self, attr, obj) - - -class IngressPerAppDataProvidedEvent(_IPAEvent): - """Event representing that ingress data has been provided for an app.""" - - __args__ = ("name", "model", "hosts", "strip_prefix", "redirect_https") - - if typing.TYPE_CHECKING: - name: Optional[str] = None - model: Optional[str] = None - # sequence of hostname, port dicts - hosts: Sequence["IngressRequirerUnitData"] = () - strip_prefix: bool = False - redirect_https: bool = False - - -class IngressPerAppDataRemovedEvent(RelationEvent): - """Event representing that ingress data has been removed for an app.""" - - -class IngressPerAppProviderEvents(ObjectEvents): - """Container for IPA Provider events.""" - - data_provided = EventSource(IngressPerAppDataProvidedEvent) - data_removed = EventSource(IngressPerAppDataRemovedEvent) - - -@dataclass -class IngressRequirerData: - """Data exposed by the ingress requirer to the provider.""" - - app: "IngressRequirerAppData" - units: List["IngressRequirerUnitData"] - - -class TlsProviderType(typing.Protocol): - """Placeholder.""" - - @property - def enabled(self) -> bool: # type: ignore - """Placeholder.""" - - -class IngressPerAppProvider(_IngressPerAppBase): - """Implementation of the provider of ingress.""" - - on = IngressPerAppProviderEvents() # type: ignore - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - ): - """Constructor for IngressPerAppProvider. - - Args: - charm: The charm that is instantiating the instance. - relation_name: The name of the relation endpoint to bind to - (defaults to "ingress"). - """ - super().__init__(charm, relation_name) - - def _handle_relation(self, event): - # created, joined or changed: if remote side has sent the required data: - # notify listeners. - if self.is_ready(event.relation): - data = self.get_data(event.relation) - self.on.data_provided.emit( # type: ignore - event.relation, - data.app.name, - data.app.model, - [unit.dict() for unit in data.units], - data.app.strip_prefix or False, - data.app.redirect_https or False, - ) - - def _handle_relation_broken(self, event): - self.on.data_removed.emit(event.relation) # type: ignore - - def wipe_ingress_data(self, relation: Relation): - """Clear ingress data from relation.""" - assert self.unit.is_leader(), "only leaders can do this" - try: - relation.data - except ModelError as e: - log.warning( - "error {} accessing relation data for {!r}. " - "Probably a ghost of a dead relation is still " - "lingering around.".format(e, relation.name) - ) - return - del relation.data[self.app]["ingress"] - - def _get_requirer_units_data(self, relation: Relation) -> List["IngressRequirerUnitData"]: - """Fetch and validate the requirer's app databag.""" - out: List["IngressRequirerUnitData"] = [] - - unit: Unit - for unit in relation.units: - databag = relation.data[unit] - try: - data = IngressRequirerUnitData.load(databag) - out.append(data) - except pydantic.ValidationError: - log.info(f"failed to validate remote unit data for {unit}") - raise - return out - - @staticmethod - def _get_requirer_app_data(relation: Relation) -> "IngressRequirerAppData": - """Fetch and validate the requirer's app databag.""" - app = relation.app - if app is None: - raise NotReadyError(relation) - - databag = relation.data[app] - return IngressRequirerAppData.load(databag) - - def get_data(self, relation: Relation) -> IngressRequirerData: - """Fetch the remote (requirer) app and units' databags.""" - try: - return IngressRequirerData( - self._get_requirer_app_data(relation), self._get_requirer_units_data(relation) - ) - except (pydantic.ValidationError, DataValidationError) as e: - raise DataValidationError("failed to validate ingress requirer data") from e - - def is_ready(self, relation: Optional[Relation] = None): - """The Provider is ready if the requirer has sent valid data.""" - if not relation: - return any(map(self.is_ready, self.relations)) - - try: - self.get_data(relation) - except (DataValidationError, NotReadyError) as e: - log.debug("Provider not ready; validation error encountered: %s" % str(e)) - return False - return True - - def _published_url(self, relation: Relation) -> Optional["IngressProviderAppData"]: - """Fetch and validate this app databag; return the ingress url.""" - if not self.is_ready(relation) or not self.unit.is_leader(): - # Handle edge case where remote app name can be missing, e.g., - # relation_broken events. - # Also, only leader units can read own app databags. - # FIXME https://github.com/canonical/traefik-k8s-operator/issues/34 - return None - - # fetch the provider's app databag - databag = relation.data[self.app] - if not databag.get("ingress"): - raise NotReadyError("This application did not `publish_url` yet.") - - return IngressProviderAppData.load(databag) - - def publish_url(self, relation: Relation, url: str): - """Publish to the app databag the ingress url.""" - ingress_url = {"url": url} - IngressProviderAppData.parse_obj({"ingress": ingress_url}).dump(relation.data[self.app]) - - @property - def proxied_endpoints(self) -> Dict[str, str]: - """Returns the ingress settings provided to applications by this IngressPerAppProvider. - - For example, when this IngressPerAppProvider has provided the - `http://foo.bar/my-model.my-app` URL to the my-app application, the returned dictionary - will be: - - ``` - { - "my-app": { - "url": "http://foo.bar/my-model.my-app" - } - } - ``` - """ - results = {} - - for ingress_relation in self.relations: - if not ingress_relation.app: - log.warning( - f"no app in relation {ingress_relation} when fetching proxied endpoints: skipping" - ) - continue - try: - ingress_data = self._published_url(ingress_relation) - except NotReadyError: - log.warning( - f"no published url found in {ingress_relation}: " - f"traefik didn't publish_url yet to this relation." - ) - continue - - if not ingress_data: - log.warning(f"relation {ingress_relation} not ready yet: try again in some time.") - continue - - results[ingress_relation.app.name] = ingress_data.ingress.dict() - return results - - -class IngressPerAppReadyEvent(_IPAEvent): - """Event representing that ingress for an app is ready.""" - - __args__ = ("url",) - if typing.TYPE_CHECKING: - url: Optional[str] = None - - -class IngressPerAppRevokedEvent(RelationEvent): - """Event representing that ingress for an app has been revoked.""" - - -class IngressPerAppRequirerEvents(ObjectEvents): - """Container for IPA Requirer events.""" - - ready = EventSource(IngressPerAppReadyEvent) - revoked = EventSource(IngressPerAppRevokedEvent) - - -class IngressPerAppRequirer(_IngressPerAppBase): - """Implementation of the requirer of the ingress relation.""" - - on = IngressPerAppRequirerEvents() # type: ignore - - # used to prevent spurious urls to be sent out if the event we're currently - # handling is a relation-broken one. - _stored = StoredState() - - def __init__( - self, - charm: CharmBase, - relation_name: str = DEFAULT_RELATION_NAME, - *, - host: Optional[str] = None, - ip: Optional[str] = None, - port: Optional[int] = None, - strip_prefix: bool = False, - redirect_https: bool = False, - # fixme: this is horrible UX. - # shall we switch to manually calling provide_ingress_requirements with all args when ready? - scheme: Union[Callable[[], str], str] = lambda: "http", - ): - """Constructor for IngressRequirer. - - The request args can be used to specify the ingress properties when the - instance is created. If any are set, at least `port` is required, and - they will be sent to the ingress provider as soon as it is available. - All request args must be given as keyword args. - - Args: - charm: the charm that is instantiating the library. - relation_name: the name of the relation endpoint to bind to (defaults to `ingress`); - relation must be of interface type `ingress` and have "limit: 1") - host: Hostname to be used by the ingress provider to address the requiring - application; if unspecified, the default Kubernetes service name will be used. - ip: Alternative addressing method other than host to be used by the ingress provider; - if unspecified, binding address from juju network API will be used. - strip_prefix: configure Traefik to strip the path prefix. - redirect_https: redirect incoming requests to HTTPS. - scheme: callable returning the scheme to use when constructing the ingress url. - Or a string, if the scheme is known and stable at charm-init-time. - - Request Args: - port: the port of the service - """ - super().__init__(charm, relation_name) - self.charm: CharmBase = charm - self.relation_name = relation_name - self._strip_prefix = strip_prefix - self._redirect_https = redirect_https - self._get_scheme = scheme if callable(scheme) else lambda: scheme - - self._stored.set_default(current_url=None) # type: ignore - - # if instantiated with a port, and we are related, then - # we immediately publish our ingress data to speed up the process. - if port: - self._auto_data = host, ip, port - else: - self._auto_data = None - - def _handle_relation(self, event): - # created, joined or changed: if we have auto data: publish it - self._publish_auto_data() - - if self.is_ready(): - # Avoid spurious events, emit only when there is a NEW URL available - new_url = ( - None - if isinstance(event, RelationBrokenEvent) - else self._get_url_from_relation_data() - ) - if self._stored.current_url != new_url: # type: ignore - self._stored.current_url = new_url # type: ignore - self.on.ready.emit(event.relation, new_url) # type: ignore - - def _handle_relation_broken(self, event): - self._stored.current_url = None # type: ignore - self.on.revoked.emit(event.relation) # type: ignore - - def _handle_upgrade_or_leader(self, event): - """On upgrade/leadership change: ensure we publish the data we have.""" - self._publish_auto_data() - - def is_ready(self): - """The Requirer is ready if the Provider has sent valid data.""" - try: - return bool(self._get_url_from_relation_data()) - except DataValidationError as e: - log.debug("Requirer not ready; validation error encountered: %s" % str(e)) - return False - - def _publish_auto_data(self): - if self._auto_data: - host, ip, port = self._auto_data - self.provide_ingress_requirements(host=host, ip=ip, port=port) - - def provide_ingress_requirements( - self, - *, - scheme: Optional[str] = None, - host: Optional[str] = None, - ip: Optional[str] = None, - port: int, - ): - """Publishes the data that Traefik needs to provide ingress. - - Args: - scheme: Scheme to be used; if unspecified, use the one used by __init__. - host: Hostname to be used by the ingress provider to address the - requirer unit; if unspecified, FQDN will be used instead - ip: Alternative addressing method other than host to be used by the ingress provider. - if unspecified, binding address from juju network API will be used. - port: the port of the service (required) - """ - for relation in self.relations: - self._provide_ingress_requirements(scheme, host, ip, port, relation) - - def _provide_ingress_requirements( - self, - scheme: Optional[str], - host: Optional[str], - ip: Optional[str], - port: int, - relation: Relation, - ): - if self.unit.is_leader(): - self._publish_app_data(scheme, port, relation) - - self._publish_unit_data(host, ip, relation) - - def _publish_unit_data( - self, - host: Optional[str], - ip: Optional[str], - relation: Relation, - ): - if not host: - host = socket.getfqdn() - - if ip is None: - network_binding = self.charm.model.get_binding(relation) - if ( - network_binding is not None - and (bind_address := network_binding.network.bind_address) is not None - ): - ip = str(bind_address) - else: - log.error("failed to retrieve ip information from juju") - - unit_databag = relation.data[self.unit] - try: - IngressRequirerUnitData(host=host, ip=ip).dump(unit_databag) - except pydantic.ValidationError as e: - msg = "failed to validate unit data" - log.info(msg, exc_info=True) # log to INFO because this might be expected - raise DataValidationError(msg) from e - - def _publish_app_data( - self, - scheme: Optional[str], - port: int, - relation: Relation, - ): - # assumes leadership! - app_databag = relation.data[self.app] - - if not scheme: - # If scheme was not provided, use the one given to the constructor. - scheme = self._get_scheme() - - try: - IngressRequirerAppData( # type: ignore # pyright does not like aliases - model=self.model.name, - name=self.app.name, - scheme=scheme, - port=port, - strip_prefix=self._strip_prefix, # type: ignore # pyright does not like aliases - redirect_https=self._redirect_https, # type: ignore # pyright does not like aliases - ).dump(app_databag) - except pydantic.ValidationError as e: - msg = "failed to validate app data" - log.info(msg, exc_info=True) # log to INFO because this might be expected - raise DataValidationError(msg) from e - - @property - def relation(self): - """The established Relation instance, or None.""" - return self.relations[0] if self.relations else None - - def _get_url_from_relation_data(self) -> Optional[str]: - """The full ingress URL to reach the current unit. - - Returns None if the URL isn't available yet. - """ - relation = self.relation - if not relation or not relation.app: - return None - - # fetch the provider's app databag - try: - databag = relation.data[relation.app] - except ModelError as e: - log.debug( - f"Error {e} attempting to read remote app data; " - f"probably we are in a relation_departed hook" - ) - return None - - if not databag: # not ready yet - return None - - return str(IngressProviderAppData.load(databag).ingress.url) - - @property - def url(self) -> Optional[str]: - """The full ingress URL to reach the current unit. - - Returns None if the URL isn't available yet. - """ - data = ( - typing.cast(Optional[str], self._stored.current_url) # type: ignore - or self._get_url_from_relation_data() - ) - return data diff --git a/charm/requirements.txt b/charm/requirements.txt index 884351b..acab50e 100644 --- a/charm/requirements.txt +++ b/charm/requirements.txt @@ -1 +1 @@ -https://github.com/canonical/xiilib/archive/feat-12f-django.tar.gz +paas-app-charmer==1.* diff --git a/charm/src/actions.py b/charm/src/actions.py index 55d8c49..1357e85 100644 --- a/charm/src/actions.py +++ b/charm/src/actions.py @@ -9,7 +9,7 @@ import secrets import ops -import xiilib.django +import paas_app_charmer.django logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ class NotReadyError(Exception): class Observer(ops.Object): """Charm actions observer.""" - def __init__(self, charm: xiilib.django.Charm): + def __init__(self, charm: paas_app_charmer.django.Charm): """Initialize the observer and register actions handlers. Args: @@ -52,14 +52,13 @@ def _execute_command(self, command: list[str], event: ops.ActionEvent) -> None: Raises: ExecError: if an error occurs while executing the script """ - container = self.charm.unit.get_container(self.charm._CONTAINER_NAME) - if not container.can_connect() or not self.charm._databases.is_ready(): + if not self.charm.is_ready(): event.fail("Service not yet ready.") - process = container.exec( + process = self.charm._container.exec( ["python3", "manage.py"] + command, - working_dir=str(self.charm._BASE_DIR / "app"), - environment=self.charm.gen_env(), + working_dir=str(self.charm._workload_config.base_dir / "app"), + environment=self.charm._gen_environment(), ) try: stdout, _ = process.wait_output() diff --git a/charm/src/charm.py b/charm/src/charm.py index 1b5fdfd..d339744 100755 --- a/charm/src/charm.py +++ b/charm/src/charm.py @@ -9,7 +9,7 @@ import actions import ops -import xiilib.django +import paas_app_charmer.django logger = logging.getLogger(__name__) @@ -20,7 +20,7 @@ RSA_PATH = "/var/lib/pebble/default/.ssh/id_rsa" -class DjangoCharm(xiilib.django.Charm): +class DjangoCharm(paas_app_charmer.django.Charm): """Flask Charm service.""" def __init__(self, *args: typing.Any) -> None: @@ -37,7 +37,7 @@ def _on_config_changed(self, _event: ops.ConfigChangedEvent) -> None: """Config changed handler. Args: - event: the event triggering the handler. + _event: the event triggering the handler. """ self._copy_files() super()._on_config_changed(_event) @@ -46,22 +46,21 @@ def _on_django_app_pebble_ready(self, _event: ops.PebbleReadyEvent) -> None: """Pebble ready handler. Args: - event: the event triggering the handler. + _event: the event triggering the handler. """ self._copy_files() super()._on_django_app_pebble_ready(_event) def _copy_files(self) -> None: """Copy files needed by git.""" - container = self.unit.get_container(self._CONTAINER_NAME) - if not container.can_connect(): + if not self._container.can_connect(): return if not self.config.get("git_repo") or not self.config.get("git_ssh_key"): return hostname = self.config.get("git_repo").split("@")[1].split("/")[0] - process = container.exec(["ssh-keyscan", "-t", "rsa", hostname]) + process = self._container.exec(["ssh-keyscan", "-t", "rsa", hostname]) output, _ = process.wait_output() - container.push( + self._container.push( KNOWN_HOSTS_PATH, output, encoding="utf-8", @@ -70,7 +69,7 @@ def _copy_files(self) -> None: group=DJANGO_GROUP, permissions=0o600, ) - container.push( + self._container.push( RSA_PATH, self.config.get("git_ssh_key"), encoding="utf-8", @@ -81,11 +80,7 @@ def _copy_files(self) -> None: ) def _on_collect_app_status(self, _: ops.CollectStatusEvent) -> None: - """Handle the status changes. - - Args: - event: the event triggering the handler. - """ + """Handle the status changes.""" if not self.config.get("git_repo"): self.unit.status = ops.WaitingStatus("Config git_repo is required") return