diff --git a/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/lib/charms/grafana_k8s/v0/grafana_dashboard.py index 1f1bc4f0..c20ab2b1 100644 --- a/lib/charms/grafana_k8s/v0/grafana_dashboard.py +++ b/lib/charms/grafana_k8s/v0/grafana_dashboard.py @@ -219,7 +219,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 35 +LIBPATCH = 32 logger = logging.getLogger(__name__) @@ -665,14 +665,14 @@ def _template_panels( continue if not existing_templates: datasource = panel.get("datasource") - if isinstance(datasource, str): + if type(datasource) == str: if "loki" in datasource: panel["datasource"] = "${lokids}" elif "grafana" in datasource: continue else: panel["datasource"] = "${prometheusds}" - elif isinstance(datasource, dict): + elif type(datasource) == dict: # In dashboards exported by Grafana 9, datasource type is dict dstype = datasource.get("type", "") if dstype == "loki": @@ -686,7 +686,7 @@ def _template_panels( logger.error("Unknown datasource format: skipping") continue else: - if isinstance(panel["datasource"], str): + if type(panel["datasource"]) == str: if panel["datasource"].lower() in replacements.values(): # Already a known template variable continue @@ -701,7 +701,7 @@ def _template_panels( if replacement: used_replacements.append(ds) panel["datasource"] = replacement or panel["datasource"] - elif isinstance(panel["datasource"], dict): + elif type(panel["datasource"]) == dict: dstype = panel["datasource"].get("type", "") if panel["datasource"].get("uid", "").lower() in replacements.values(): # Already a known template variable @@ -790,7 +790,7 @@ def _inject_labels(content: str, topology: dict, transformer: "CosTool") -> str: # We need to use an index so we can insert the changed element back later for panel_idx, panel in enumerate(panels): - if not isinstance(panel, dict): + if type(panel) is not dict: continue # Use the index to insert it back in the same location @@ -831,11 +831,11 @@ def _modify_panel(panel: dict, topology: dict, transformer: "CosTool") -> dict: if "datasource" not in panel.keys(): continue - if isinstance(panel["datasource"], str): + if type(panel["datasource"]) == str: if panel["datasource"] not in known_datasources: continue querytype = known_datasources[panel["datasource"]] - elif isinstance(panel["datasource"], dict): + elif type(panel["datasource"]) == dict: if panel["datasource"]["uid"] not in known_datasources: continue querytype = known_datasources[panel["datasource"]["uid"]] @@ -1195,7 +1195,6 @@ def _on_grafana_dashboard_relation_created(self, event: RelationCreatedEvent) -> `grafana_dashboaard` relationship is joined """ if self._charm.unit.is_leader(): - self._update_all_dashboards_from_dir() self._upset_dashboards_on_relation(event.relation) def _on_grafana_dashboard_relation_changed(self, event: RelationChangedEvent) -> None: diff --git a/lib/charms/observability_libs/v0/juju_topology.py b/lib/charms/observability_libs/v0/juju_topology.py new file mode 100644 index 00000000..a79e5d43 --- /dev/null +++ b/lib/charms/observability_libs/v0/juju_topology.py @@ -0,0 +1,301 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to use the `JujuTopology` class to +create and consume topology information from Juju in a consistent manner. + +The goal of the Juju topology is to uniquely identify a piece +of software running across any of your Juju-managed deployments. +This is achieved by combining the following four elements: + +- Model name +- Model UUID +- Application name +- Unit identifier + + +For a more in-depth description of the concept, as well as a +walk-through of it's use-case in observability, see +[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) +on the Juju blog. + +## Library Usage + +This library may be used to create and consume `JujuTopology` objects. +The `JujuTopology` class provides three ways to create instances: + +### Using the `from_charm` method + +Enables instantiation by supplying the charm as an argument. When +creating topology objects for the current charm, this is the recommended +approach. + +```python +topology = JujuTopology.from_charm(self) +``` + +### Using the `from_dict` method + +Allows for instantion using a dictionary of relation data, like the +`scrape_metadata` from Prometheus or the labels of an alert rule. When +creating topology objects for remote charms, this is the recommended +approach. + +```python +scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) +topology = JujuTopology.from_dict(scrape_metadata) +``` + +### Using the class constructor + +Enables instantiation using whatever values you want. While this +is useful in some very specific cases, this is almost certainly not +what you are looking for as setting these values manually may +result in observability metrics which do not uniquely identify a +charm in order to provide accurate usage reporting, alerting, +horizontal scaling, or other use cases. + +```python +topology = JujuTopology( + model="some-juju-model", + model_uuid="00000000-0000-0000-0000-000000000001", + application="fancy-juju-application", + unit="fancy-juju-application/0", + charm_name="fancy-juju-application-k8s", +) +``` + +""" +from collections import OrderedDict +from typing import Dict, List, Optional +from uuid import UUID + +# The unique Charmhub library identifier, never change it +LIBID = "bced1658f20f49d28b88f61f83c2d232" + +LIBAPI = 0 +LIBPATCH = 6 + + +class InvalidUUIDError(Exception): + """Invalid UUID was provided.""" + + def __init__(self, uuid: str): + self.message = "'{}' is not a valid UUID.".format(uuid) + super().__init__(self.message) + + +class JujuTopology: + """JujuTopology is used for storing, generating and formatting juju topology information. + + DEPRECATED: This class is deprecated. Use `pip install cosl` and + `from cosl.juju_topology import JujuTopology` instead. + """ + + def __init__( + self, + model: str, + model_uuid: str, + application: str, + unit: Optional[str] = None, + charm_name: Optional[str] = None, + ): + """Build a JujuTopology object. + + A `JujuTopology` object is used for storing and transforming + Juju topology information. This information is used to + annotate Prometheus scrape jobs and alert rules. Such + annotation when applied to scrape jobs helps in identifying + the source of the scrapped metrics. On the other hand when + applied to alert rules topology information ensures that + evaluation of alert expressions is restricted to the source + (charm) from which the alert rules were obtained. + + Args: + model: a string name of the Juju model + model_uuid: a globally unique string identifier for the Juju model + application: an application name as a string + unit: a unit name as a string + charm_name: name of charm as a string + """ + if not self.is_valid_uuid(model_uuid): + raise InvalidUUIDError(model_uuid) + + self._model = model + self._model_uuid = model_uuid + self._application = application + self._charm_name = charm_name + self._unit = unit + + def is_valid_uuid(self, uuid): + """Validate the supplied UUID against the Juju Model UUID pattern. + + Args: + uuid: string that needs to be checked if it is valid v4 UUID. + + Returns: + True if parameter is a valid v4 UUID, False otherwise. + """ + try: + return str(UUID(uuid, version=4)) == uuid + except (ValueError, TypeError): + return False + + @classmethod + def from_charm(cls, charm): + """Creates a JujuTopology instance by using the model data available on a charm object. + + Args: + charm: a `CharmBase` object for which the `JujuTopology` will be constructed + Returns: + a `JujuTopology` object. + """ + return cls( + model=charm.model.name, + model_uuid=charm.model.uuid, + application=charm.model.app.name, + unit=charm.model.unit.name, + charm_name=charm.meta.name, + ) + + @classmethod + def from_dict(cls, data: dict): + """Factory method for creating `JujuTopology` children from a dictionary. + + Args: + data: a dictionary with five keys providing topology information. The keys are + - "model" + - "model_uuid" + - "application" + - "unit" + - "charm_name" + `unit` and `charm_name` may be empty, but will result in more limited + labels. However, this allows us to support charms without workloads. + + Returns: + a `JujuTopology` object. + """ + return cls( + model=data["model"], + model_uuid=data["model_uuid"], + application=data["application"], + unit=data.get("unit", ""), + charm_name=data.get("charm_name", ""), + ) + + def as_dict( + self, + *, + remapped_keys: Optional[Dict[str, str]] = None, + excluded_keys: Optional[List[str]] = None, + ) -> OrderedDict: + """Format the topology information into an ordered dict. + + Keeping the dictionary ordered is important to be able to + compare dicts without having to resort to deep comparisons. + + Args: + remapped_keys: A dictionary mapping old key names to new key names, + which will be substituted when invoked. + excluded_keys: A list of key names to exclude from the returned dict. + uuid_length: The length to crop the UUID to. + """ + ret = OrderedDict( + [ + ("model", self.model), + ("model_uuid", self.model_uuid), + ("application", self.application), + ("unit", self.unit), + ("charm_name", self.charm_name), + ] + ) + if excluded_keys: + ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) + + if remapped_keys: + ret = OrderedDict( + (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore + ) + + return ret + + @property + def identifier(self) -> str: + """Format the topology information into a terse string. + + This crops the model UUID, making it unsuitable for comparisons against + anything but other identifiers. Mainly to be used as a display name or file + name where long strings might become an issue. + + >>> JujuTopology( \ + model = "a-model", \ + model_uuid = "00000000-0000-4000-8000-000000000000", \ + application = "some-app", \ + unit = "some-app/1" \ + ).identifier + 'a-model_00000000_some-app' + """ + parts = self.as_dict( + excluded_keys=["unit", "charm_name"], + ) + + parts["model_uuid"] = self.model_uuid_short + values = parts.values() + + return "_".join([str(val) for val in values]).replace("/", "_") + + @property + def label_matcher_dict(self) -> Dict[str, str]: + """Format the topology information into a dict with keys having 'juju_' as prefix. + + Relabelled topology never includes the unit as it would then only match + the leader unit (ie. the unit that produced the dict). + """ + items = self.as_dict( + remapped_keys={"charm_name": "charm"}, + excluded_keys=["unit"], + ).items() + + return {"juju_{}".format(key): value for key, value in items if value} + + @property + def label_matchers(self) -> str: + """Format the topology information into a promql/logql label matcher string. + + Topology label matchers should never include the unit as it + would then only match the leader unit (ie. the unit that + produced the matchers). + """ + items = self.label_matcher_dict.items() + return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) + + @property + def model(self) -> str: + """Getter for the juju model value.""" + return self._model + + @property + def model_uuid(self) -> str: + """Getter for the juju model uuid value.""" + return self._model_uuid + + @property + def model_uuid_short(self) -> str: + """Getter for the juju model value, truncated to the first eight letters.""" + return self._model_uuid[:8] + + @property + def application(self) -> str: + """Getter for the juju application value.""" + return self._application + + @property + def charm_name(self) -> Optional[str]: + """Getter for the juju charm name value.""" + return self._charm_name + + @property + def unit(self) -> Optional[str]: + """Getter for the juju unit value.""" + return self._unit diff --git a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index be967686..cac364e3 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -18,6 +18,13 @@ Source code can be found on GitHub at: https://github.com/canonical/prometheus-k8s-operator/tree/main/lib/charms/prometheus_k8s +## Dependencies + +Using this library requires you to fetch the juju_topology library from +[observability-libs](https://charmhub.io/observability-libs/libraries/juju_topology). + +`charmcraft fetch-lib charms.observability_libs.v0.juju_topology` + ## Provider Library Usage This Prometheus charm interacts with its scrape targets using its @@ -336,11 +343,12 @@ def _on_scrape_targets_changed(self, event): from collections import defaultdict from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from urllib.error import HTTPError, URLError from urllib.parse import urlparse +from urllib.request import urlopen import yaml -from cosl import JujuTopology -from cosl.rules import AlertRules +from charms.observability_libs.v0.juju_topology import JujuTopology from ops.charm import CharmBase, RelationRole from ops.framework import ( BoundEvent, @@ -362,9 +370,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 46 - -PYDEPS = ["cosl"] +LIBPATCH = 38 logger = logging.getLogger(__name__) @@ -386,7 +392,6 @@ def _on_scrape_targets_changed(self, event): "basic_auth", "tls_config", "authorization", - "params", } DEFAULT_JOB = { "metrics_path": "/metrics", @@ -521,8 +526,8 @@ def expand_wildcard_targets_into_individual_jobs( # for such a target. Therefore labeling with Juju topology, excluding the # unit name. non_wildcard_static_config["labels"] = { - **topology.label_matcher_dict, **non_wildcard_static_config.get("labels", {}), + **topology.label_matcher_dict, } non_wildcard_static_configs.append(non_wildcard_static_config) @@ -547,9 +552,9 @@ def expand_wildcard_targets_into_individual_jobs( if topology: # Add topology labels modified_static_config["labels"] = { + **modified_static_config.get("labels", {}), **topology.label_matcher_dict, **{"juju_unit": unit_name}, - **modified_static_config.get("labels", {}), } # Instance relabeling for topology should be last in order. @@ -605,12 +610,12 @@ def render_alertmanager_static_configs(alertmanagers: List[str]): return { "alertmanagers": [ { - # For https we still do not render a `tls_config` section because - # certs are expected to be made available by the charm via the - # `update-ca-certificates` mechanism. "scheme": scheme, "path_prefix": path_prefix, "static_configs": [{"targets": netlocs}], + # FIXME figure out how to get alertmanager's ca_file into here + # Without this, prom errors: "x509: certificate signed by unknown authority" + "tls_config": {"insecure_skip_verify": True}, } for (scheme, path_prefix), netlocs in paths.items() ] @@ -765,7 +770,7 @@ def _validate_relation_by_interface_and_direction( actual_relation_interface = relation.interface_name if actual_relation_interface != expected_relation_interface: raise RelationInterfaceMismatchError( - relation_name, expected_relation_interface, actual_relation_interface or "None" + relation_name, expected_relation_interface, actual_relation_interface ) if expected_relation_role == RelationRole.provides: @@ -833,6 +838,206 @@ def _is_single_alert_rule_format(rules_dict: dict) -> bool: return set(rules_dict) >= {"alert", "expr"} +class AlertRules: + """Utility class for amalgamating prometheus alert rule files and injecting juju topology. + + An `AlertRules` object supports aggregating alert rules from files and directories in both + official and single rule file formats using the `add_path()` method. All the alert rules + read are annotated with Juju topology labels and amalgamated into a single data structure + in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be + easily dumped into JSON format and exchanged over relation data. The dictionary can also + be dumped into YAML format and written directly into an alert rules file that is read by + Prometheus. Note that multiple `AlertRules` objects must not be written into the same file, + since Prometheus allows only a single list of alert rule groups per alert rules file. + + The official Prometheus format is a YAML file conforming to the Prometheus documentation + (https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/). + The custom single rule format is a subsection of the official YAML, having a single alert + rule, effectively "one alert per file". + """ + + # This class uses the following terminology for the various parts of a rule file: + # - alert rules file: the entire groups[] yaml, including the "groups:" key. + # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list + # of dictionaries that have the "name" and "rules" keys. + # - alert group (singular): a single dictionary that has the "name" and "rules" keys. + # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with + # the "alert" and "expr" keys. + # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. + + def __init__(self, topology: Optional[JujuTopology] = None): + """Build and alert rule object. + + Args: + topology: an optional `JujuTopology` instance that is used to annotate all alert rules. + """ + self.topology = topology + self.tool = CosTool(None) + self.alert_groups = [] # type: List[dict] + + def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: + """Read a rules file from path, injecting juju topology. + + Args: + root_path: full path to the root rules folder (used only for generating group name) + file_path: full path to a *.rule file. + + Returns: + A list of dictionaries representing the rules file, if file is valid (the structure is + formed by `yaml.safe_load` of the file); an empty list otherwise. + """ + with file_path.open() as rf: + # Load a list of rules from file then add labels and filters + try: + rule_file = yaml.safe_load(rf) + + except Exception as e: + logger.error("Failed to read alert rules from %s: %s", file_path.name, e) + return [] + + if not rule_file: + logger.warning("Empty rules file: %s", file_path.name) + return [] + if not isinstance(rule_file, dict): + logger.error("Invalid rules file (must be a dict): %s", file_path.name) + return [] + if _is_official_alert_rule_format(rule_file): + alert_groups = rule_file["groups"] + elif _is_single_alert_rule_format(rule_file): + # convert to list of alert groups + # group name is made up from the file name + alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] + else: + # invalid/unsupported + logger.error("Invalid rules file: %s", file_path.name) + return [] + + # update rules with additional metadata + for alert_group in alert_groups: + # update group name with topology and sub-path + alert_group["name"] = self._group_name( + str(root_path), + str(file_path), + alert_group["name"], + ) + + # add "juju_" topology labels + for alert_rule in alert_group["rules"]: + if "labels" not in alert_rule: + alert_rule["labels"] = {} + + if self.topology: + alert_rule["labels"].update(self.topology.label_matcher_dict) + # insert juju topology filters into a prometheus alert rule + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", alert_rule["expr"]), + self.topology.label_matcher_dict, + ) + + return alert_groups + + def _group_name(self, root_path: str, file_path: str, group_name: str) -> str: + """Generate group name from path and topology. + + The group name is made up of the relative path between the root dir_path, the file path, + and topology identifier. + + Args: + root_path: path to the root rules dir. + file_path: path to rule file. + group_name: original group name to keep as part of the new augmented group name + + Returns: + New group name, augmented by juju topology and relative path. + """ + rel_path = os.path.relpath(os.path.dirname(file_path), root_path) + rel_path = "" if rel_path == "." else rel_path.replace(os.path.sep, "_") + + # Generate group name: + # - name, from juju topology + # - suffix, from the relative path of the rule file; + group_name_parts = [self.topology.identifier] if self.topology else [] + group_name_parts.extend([rel_path, group_name, "alerts"]) + # filter to remove empty strings + return "_".join(filter(None, group_name_parts)) + + @classmethod + def _multi_suffix_glob( + cls, dir_path: Path, suffixes: List[str], recursive: bool = True + ) -> list: + """Helper function for getting all files in a directory that have a matching suffix. + + Args: + dir_path: path to the directory to glob from. + suffixes: list of suffixes to include in the glob (items should begin with a period). + recursive: a flag indicating whether a glob is recursive (nested) or not. + + Returns: + List of files in `dir_path` that have one of the suffixes specified in `suffixes`. + """ + all_files_in_dir = dir_path.glob("**/*" if recursive else "*") + return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) + + def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: + """Read all rule files in a directory. + + All rules from files for the same directory are loaded into a single + group. The generated name of this group includes juju topology. + By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. + + Args: + dir_path: directory containing *.rule files (alert rules without groups). + recursive: flag indicating whether to scan for rule files recursively. + + Returns: + a list of dictionaries representing prometheus alert rule groups, each dictionary + representing an alert group (structure determined by `yaml.safe_load`). + """ + alert_groups = [] # type: List[dict] + + # Gather all alerts into a list of groups + for file_path in self._multi_suffix_glob( + dir_path, [".rule", ".rules", ".yml", ".yaml"], recursive + ): + alert_groups_from_file = self._from_file(dir_path, file_path) + if alert_groups_from_file: + logger.debug("Reading alert rule from %s", file_path) + alert_groups.extend(alert_groups_from_file) + + return alert_groups + + def add_path(self, path: str, *, recursive: bool = False) -> None: + """Add rules from a dir path. + + All rules from files are aggregated into a data structure representing a single rule file. + All group names are augmented with juju topology. + + Args: + path: either a rules file or a dir of rules files. + recursive: whether to read files recursively or not (no impact if `path` is a file). + + Returns: + True if path was added else False. + """ + path = Path(path) # type: Path + if path.is_dir(): + self.alert_groups.extend(self._from_dir(path, recursive)) + elif path.is_file(): + self.alert_groups.extend(self._from_file(path.parent, path)) + else: + logger.debug("Alert rules path does not exist: %s", path) + + def as_dict(self) -> dict: + """Return standard alert rules file in dict representation. + + Returns: + a dictionary containing a single list of alert rule groups. + The list of alert rule groups is provided as value of the + "groups" dictionary key. + """ + return {"groups": self.alert_groups} if self.alert_groups else {} + + class TargetsChangedEvent(EventBase): """Event emitted when Prometheus scrape targets change.""" @@ -858,7 +1063,7 @@ class MonitoringEvents(ObjectEvents): class MetricsEndpointConsumer(Object): """A Prometheus based Monitoring service.""" - on = MonitoringEvents() # pyright: ignore + on = MonitoringEvents() def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): """A Prometheus based Monitoring service. @@ -1015,6 +1220,7 @@ def alerts(self) -> dict: try: scrape_metadata = json.loads(relation.data[relation.app]["scrape_metadata"]) identifier = JujuTopology.from_dict(scrape_metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore except KeyError as e: logger.debug( @@ -1029,10 +1235,6 @@ def alerts(self) -> dict: ) continue - # We need to append the relation info to the identifier. This is to allow for cases for there are two - # relations which eventually scrape the same application. Issue #551. - identifier = f"{identifier}_{relation.name}_{relation.id}" - alerts[identifier] = alert_rules _, errmsg = self._tool.validate_alert_rules(alert_rules) @@ -1126,7 +1328,7 @@ def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: # Inject topology and put it back in the list rule["expr"] = self._tool.inject_label_matchers( re.sub(r"%%juju_topology%%,?", "", rule["expr"]), - topology.alert_expression_dict, + topology.label_matcher_dict, ) except KeyError: # Some required JujuTopology key is missing. Just move on. @@ -1180,8 +1382,16 @@ def _static_scrape_config(self, relation) -> list: scrape_configs, hosts, topology ) - # For https scrape targets we still do not render a `tls_config` section because certs - # are expected to be made available by the charm via the `update-ca-certificates` mechanism. + # If scheme is https but no ca section present, then auto add "insecure_skip_verify", + # otherwise scraping errors out with "x509: certificate signed by unknown authority". + # https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config + for scrape_config in scrape_configs: + tls_config = scrape_config.get("tls_config", {}) + ca_present = "ca" in tls_config or "ca_file" in tls_config + if scrape_config.get("scheme") == "https" and not ca_present: + tls_config["insecure_skip_verify"] = True + scrape_config["tls_config"] = tls_config + return scrape_configs def _relation_hosts(self, relation: Relation) -> Dict[str, Tuple[str, str]]: @@ -1298,7 +1508,7 @@ def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> st class MetricsEndpointProvider(Object): """A metrics endpoint for Prometheus.""" - on = MetricsEndpointProviderEvents() # pyright: ignore + on = MetricsEndpointProviderEvents() def __init__( self, @@ -1529,7 +1739,7 @@ def set_scrape_job_spec(self, _=None): if not self._charm.unit.is_leader(): return - alert_rules = AlertRules(query_type="promql", topology=self.topology) + alert_rules = AlertRules(topology=self.topology) alert_rules.add_path(self._alert_rules_path, recursive=True) alert_rules_as_dict = alert_rules.as_dict() @@ -1537,11 +1747,12 @@ def set_scrape_job_spec(self, _=None): relation.data[self._charm.app]["scrape_metadata"] = json.dumps(self._scrape_metadata) relation.data[self._charm.app]["scrape_jobs"] = json.dumps(self._scrape_jobs) - # Update relation data with the string representation of the rule file. - # Juju topology is already included in the "scrape_metadata" field above. - # The consumer side of the relation uses this information to name the rules file - # that is written to the filesystem. - relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict) + if alert_rules_as_dict: + # Update relation data with the string representation of the rule file. + # Juju topology is already included in the "scrape_metadata" field above. + # The consumer side of the relation uses this information to name the rules file + # that is written to the filesystem. + relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict) def _set_unit_ip(self, _=None): """Set unit host address. @@ -1675,7 +1886,7 @@ def _update_relation_data(self, _): if not self._charm.unit.is_leader(): return - alert_rules = AlertRules(query_type="promql") + alert_rules = AlertRules() alert_rules.add_path(self.dir_path, recursive=self._recursive) alert_rules_as_dict = alert_rules.as_dict() @@ -1839,16 +2050,14 @@ def _set_prometheus_data(self, event): return jobs = [] + _type_convert_stored( - self._stored.jobs # pyright: ignore + self._stored.jobs ) # list of scrape jobs, one per relation for relation in self.model.relations[self._target_relation]: targets = self._get_targets(relation) if targets and relation.app: jobs.append(self._static_scrape_job(targets, relation.app.name)) - groups = [] + _type_convert_stored( - self._stored.alert_rules # pyright: ignore - ) # list of alert rule groups + groups = [] + _type_convert_stored(self._stored.alert_rules) # list of alert rule groups for relation in self.model.relations[self._alert_rules_relation]: unit_rules = self._get_alert_rules(relation) if unit_rules and relation.app: @@ -1900,7 +2109,7 @@ def set_target_job_data(self, targets: dict, app_name: str, **kwargs) -> None: jobs.append(updated_job) relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) - if not _type_convert_stored(self._stored.jobs) == jobs: # pyright: ignore + if not _type_convert_stored(self._stored.jobs) == jobs: self._stored.jobs = jobs def _on_prometheus_targets_departed(self, event): @@ -1952,7 +2161,7 @@ def remove_prometheus_jobs(self, job_name: str, unit_name: Optional[str] = ""): relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) - if not _type_convert_stored(self._stored.jobs) == jobs: # pyright: ignore + if not _type_convert_stored(self._stored.jobs) == jobs: self._stored.jobs = jobs def _job_name(self, appname) -> str: @@ -2057,7 +2266,16 @@ def _static_config_extra_labels(self, target: Dict[str, str]) -> Dict[str, str]: logger.debug("Could not perform DNS lookup for %s", target["hostname"]) dns_name = target["hostname"] extra_info["dns_name"] = dns_name + label_re = re.compile(r'(?P