Skip to content

Commit

Permalink
Speed up bootstrap (#413)
Browse files Browse the repository at this point in the history
* Speed up bootstrap
* Add unit tests
* Sync poetry.lock from main
* Add install and active log messages and set primary status message earlier
* Enable and fix existing unit test

Signed-off-by: Marcelo Henrique Neppel <[email protected]>
  • Loading branch information
marceloneppel authored Jun 25, 2024
1 parent dbd46b5 commit 165d04c
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 26 deletions.
7 changes: 5 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import platform
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Literal, Optional, Set, get_args

Expand Down Expand Up @@ -352,7 +353,7 @@ def primary_endpoint(self) -> Optional[str]:
logger.debug("primary endpoint early exit: Peer relation not joined yet.")
return None
try:
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)):
with attempt:
primary = self._patroni.get_primary()
if primary is None and (standby_leader := self._patroni.get_standby_leader()):
Expand Down Expand Up @@ -855,6 +856,7 @@ def _on_cluster_topology_change(self, _):

def _on_install(self, event: InstallEvent) -> None:
"""Install prerequisites for the application."""
logger.debug("Install start time: %s", datetime.now())
if not self._is_storage_attached():
self._reboot_on_detached_storage(event)
return
Expand Down Expand Up @@ -1162,7 +1164,8 @@ def _start_primary(self, event: StartEvent) -> None:
# was fully initialised.
self.enable_disable_extensions()

self.unit.status = ActiveStatus()
logger.debug("Active workload time: %s", datetime.now())
self._set_primary_status_message()

def _start_replica(self, event) -> None:
"""Configure the replica if the cluster was already initialised."""
Expand Down
2 changes: 1 addition & 1 deletion src/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def are_all_members_ready(self) -> bool:

def get_patroni_health(self) -> Dict[str, str]:
"""Gets, retires and parses the Patroni health endpoint."""
for attempt in Retrying(stop=stop_after_delay(90), wait=wait_fixed(3)):
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(7)):
with attempt:
r = requests.get(
f"{self._patroni_url}/health",
Expand Down
6 changes: 4 additions & 2 deletions src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,10 @@ def _on_upgrade_charm_check_legacy(self) -> None:

peers_state = list(filter(lambda state: state != "", self.unit_states))

if len(peers_state) == len(self.peer_relation.units) and (
set(peers_state) == {"ready"} or len(peers_state) == 0
if (
len(peers_state) == len(self.peer_relation.units)
and (set(peers_state) == {"ready"} or len(peers_state) == 0)
and self.charm.is_cluster_initialised
):
if self.charm._patroni.member_started:
# All peers have set the state to ready
Expand Down
60 changes: 39 additions & 21 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import platform
import subprocess
from unittest import TestCase
from unittest.mock import MagicMock, Mock, PropertyMock, call, mock_open, patch, sentinel

import pytest
Expand Down Expand Up @@ -37,6 +38,9 @@

CREATE_CLUSTER_CONF_PATH = "/etc/postgresql-common/createcluster.d/pgcharm.conf"

# used for assert functions
tc = TestCase()


@pytest.fixture(autouse=True)
def harness():
Expand Down Expand Up @@ -165,7 +169,9 @@ def test_patroni_scrape_config_tls(harness):


def test_primary_endpoint(harness):
with patch(
with patch("charm.stop_after_delay", new_callable=PropertyMock) as _stop_after_delay, patch(
"charm.wait_fixed", new_callable=PropertyMock
) as _wait_fixed, patch(
"charm.PostgresqlOperatorCharm._units_ips",
new_callable=PropertyMock,
return_value={"1.1.1.1", "1.1.1.2"},
Expand All @@ -174,6 +180,10 @@ def test_primary_endpoint(harness):
_patroni.return_value.get_primary.return_value = sentinel.primary
assert harness.charm.primary_endpoint == "1.1.1.1"

# Check needed to ensure a fast charm deployment.
_stop_after_delay.assert_called_once_with(5)
_wait_fixed.assert_called_once_with(3)

_patroni.return_value.get_member_ip.assert_called_once_with(sentinel.primary)
_patroni.return_value.get_primary.assert_called_once_with()

Expand Down Expand Up @@ -547,6 +557,9 @@ def test_enable_disable_extensions(harness, caplog):
@patch_network_get(private_address="1.1.1.1")
def test_on_start(harness):
with (
patch(
"charm.PostgresqlOperatorCharm._set_primary_status_message"
) as _set_primary_status_message,
patch(
"charm.PostgresqlOperatorCharm.enable_disable_extensions"
) as _enable_disable_extensions,
Expand Down Expand Up @@ -622,7 +635,7 @@ def test_on_start(harness):
assert _postgresql.create_user.call_count == 4 # Considering the previous failed call.
_oversee_users.assert_called_once()
_enable_disable_extensions.assert_called_once()
assert isinstance(harness.model.unit.status, ActiveStatus)
_set_primary_status_message.assert_called_once()


@patch_network_get(private_address="1.1.1.1")
Expand Down Expand Up @@ -2256,16 +2269,21 @@ def test_update_new_unit_status(harness):
handle_read_only_mode.assert_not_called()
assert isinstance(harness.charm.unit.status, WaitingStatus)

@patch("charm.Patroni.member_started", new_callable=PropertyMock)
@patch("charm.PostgresqlOperatorCharm.is_standby_leader", new_callable=PropertyMock)
@patch("charm.Patroni.get_primary")
def test_set_active_status(self, _get_primary, _is_standby_leader, _member_started):

def test_set_primary_status_message(harness):
with (
patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started,
patch(
"charm.PostgresqlOperatorCharm.is_standby_leader", new_callable=PropertyMock
) as _is_standby_leader,
patch("charm.Patroni.get_primary") as _get_primary,
):
for values in itertools.product(
[
RetryError(last_attempt=1),
ConnectionError,
self.charm.unit.name,
f"{self.charm.app.name}/2",
harness.charm.unit.name,
f"{harness.charm.app.name}/2",
],
[
RetryError(last_attempt=1),
Expand All @@ -2275,34 +2293,34 @@ def test_set_active_status(self, _get_primary, _is_standby_leader, _member_start
],
[True, False],
):
self.charm.unit.status = MaintenanceStatus("fake status")
harness.charm.unit.status = MaintenanceStatus("fake status")
_member_started.return_value = values[2]
if isinstance(values[0], str):
_get_primary.side_effect = None
_get_primary.return_value = values[0]
if values[0] != self.charm.unit.name and not isinstance(values[1], bool):
if values[0] != harness.charm.unit.name and not isinstance(values[1], bool):
_is_standby_leader.side_effect = values[1]
_is_standby_leader.return_value = None
self.charm._set_active_status()
self.assertIsInstance(self.charm.unit.status, MaintenanceStatus)
harness.charm._set_primary_status_message()
tc.assertIsInstance(harness.charm.unit.status, MaintenanceStatus)
else:
_is_standby_leader.side_effect = None
_is_standby_leader.return_value = values[1]
self.charm._set_active_status()
self.assertIsInstance(
self.charm.unit.status,
harness.charm._set_primary_status_message()
tc.assertIsInstance(
harness.charm.unit.status,
ActiveStatus
if values[0] == self.charm.unit.name or values[1] or values[2]
if values[0] == harness.charm.unit.name or values[1] or values[2]
else MaintenanceStatus,
)
self.assertEqual(
self.charm.unit.status.message,
tc.assertEqual(
harness.charm.unit.status.message,
"Primary"
if values[0] == self.charm.unit.name
if values[0] == harness.charm.unit.name
else ("Standby" if values[1] else ("" if values[2] else "fake status")),
)
else:
_get_primary.side_effect = values[0]
_get_primary.return_value = None
self.charm._set_active_status()
self.assertIsInstance(self.charm.unit.status, MaintenanceStatus)
harness.charm._set_primary_status_message()
tc.assertIsInstance(harness.charm.unit.status, MaintenanceStatus)
23 changes: 23 additions & 0 deletions tests/unit/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def json(self):
"http://server1/cluster": {
"members": [{"name": "postgresql-0", "host": "1.1.1.1", "role": "leader", "lag": "1"}]
},
"http://server1/health": {"state": "running"},
"http://server4/cluster": {"members": []},
}
if args[0] in data:
Expand Down Expand Up @@ -128,6 +129,28 @@ def test_get_member_ip(peers_ips, patroni):
tc.assertIsNone(ip)


def test_get_patroni_health(peers_ips, patroni):
with patch("cluster.stop_after_delay", new_callable=PropertyMock) as _stop_after_delay, patch(
"cluster.wait_fixed", new_callable=PropertyMock
) as _wait_fixed, patch(
"charm.Patroni._patroni_url", new_callable=PropertyMock
) as _patroni_url, patch("requests.get", side_effect=mocked_requests_get) as _get:
# Test when the Patroni API is reachable.
_patroni_url.return_value = "http://server1"
health = patroni.get_patroni_health()

# Check needed to ensure a fast charm deployment.
_stop_after_delay.assert_called_once_with(60)
_wait_fixed.assert_called_once_with(7)

tc.assertEqual(health, {"state": "running"})

# Test when the Patroni API is not reachable.
_patroni_url.return_value = "http://server2"
with tc.assertRaises(tenacity.RetryError):
patroni.get_patroni_health()


def test_get_postgresql_version(peers_ips, patroni):
with patch("charm.snap.SnapClient") as _snap_client:
# TODO test a real implementation
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,41 @@ def test_log_rollback(harness):
)


@pytest.mark.parametrize(
"unit_states,is_cluster_initialised,call",
[
(["ready"], False, False),
(["ready", "ready"], True, False),
(["idle"], False, False),
(["idle"], True, False),
(["ready"], True, True),
],
)
def test_on_upgrade_charm_check_legacy(harness, unit_states, is_cluster_initialised, call):
with (
patch(
"charms.data_platform_libs.v0.upgrade.DataUpgrade.state",
new_callable=PropertyMock(return_value=None),
) as _state,
patch(
"charms.data_platform_libs.v0.upgrade.DataUpgrade.unit_states",
new_callable=PropertyMock(return_value=unit_states),
) as _unit_states,
patch(
"charm.PostgresqlOperatorCharm.is_cluster_initialised",
new_callable=PropertyMock(return_value=is_cluster_initialised),
) as _is_cluster_initialised,
patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started,
patch(
"upgrade.PostgreSQLUpgrade._prepare_upgrade_from_legacy"
) as _prepare_upgrade_from_legacy,
):
with harness.hooks_disabled():
harness.set_leader(True)
harness.charm.upgrade._on_upgrade_charm_check_legacy()
_member_started.assert_called_once() if call else _member_started.assert_not_called()


@patch_network_get(private_address="1.1.1.1")
def test_on_upgrade_granted(harness):
with (
Expand Down

0 comments on commit 165d04c

Please sign in to comment.