Skip to content

Commit

Permalink
redesign to comply fair rebalance per ana-grp and subsystem
Browse files Browse the repository at this point in the history
works for 3 GWs and 3 subsystem. Just 1 GW in time performs rebalance
added test for github, merged again,
accelerate rebalance when several ANA are in Optimized state on some GW.
added protection for shared data.

Signed-off-by: Leonid Chernin <[email protected]>
  • Loading branch information
Leonid Chernin committed Nov 14, 2024
1 parent 0c31e3d commit 91b5b35
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 87 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ jobs:
strategy:
fail-fast: false
matrix:
test: ["sanity", "ns_lb_change", "no_subsystems", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit"]
test: ["sanity", "ns_lb_change", "no_subsystems", "auto_load_balance", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit"]
runs-on: ubuntu-latest
env:
HUGEPAGES: 1024 # 4 spdk instances
Expand Down
4 changes: 2 additions & 2 deletions ceph-nvmeof.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ state_update_notify = True
state_update_timeout_in_msec = 2000
state_update_interval_sec = 5
enable_spdk_discovery_controller = False
rebalance_period_sec = 12
max_ns_to_change_lb_grp = 4
rebalance_period_sec = 10
max_ns_to_change_lb_grp = 7
#omap_file_lock_duration = 20
#omap_file_lock_retries = 30
#omap_file_lock_retry_sleep_interval = 1.0
Expand Down
9 changes: 9 additions & 0 deletions control/cephutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import rados
import time
import json
import re
from .utils import GatewayLogger

class CephUtils:
Expand All @@ -25,6 +26,7 @@ def __init__(self, config):
self.rados_id = config.get_with_default("ceph", "id", "")
self.anagroup_list = []
self.rebalance_supported = False
self.rebalance_ana_group = 0
self.last_sent = time.time()

def execute_ceph_monitor_command(self, cmd):
Expand Down Expand Up @@ -54,6 +56,9 @@ def get_gw_id_owner_ana_group(self, pool, group, anagrp):
def is_rebalance_supported(self):
return self.rebalance_supported

def get_rebalance_ana_group(self):
return self.rebalance_ana_group

def get_number_created_gateways(self, pool, group):
now = time.time()
if (now - self.last_sent) < 10 and self.anagroup_list :
Expand All @@ -71,6 +76,10 @@ def get_number_created_gateways(self, pool, group):
pos = conv_str.find('"LB"')
if pos != -1:
self.rebalance_supported = True
match = re.search(r'"rebalance_ana_group":\s*(\d+)', conv_str)
if match:
self.rebalance_ana_group = int(match.group(1))
self.logger.debug(f"Rebalance ana_group: {self.rebalance_ana_group}")
else :
self.rebalance_supported = False
pos = conv_str.find("[")
Expand Down
Loading

0 comments on commit 91b5b35

Please sign in to comment.