Skip to content

Commit

Permalink
Enforce scale limitations
Browse files Browse the repository at this point in the history
Fixes #948

Signed-off-by: Gil Bregman <[email protected]>
  • Loading branch information
gbregman committed Nov 15, 2024
1 parent 0980145 commit 2a26ee9
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 32 deletions.
3 changes: 3 additions & 0 deletions ceph-nvmeof.conf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ enable_spdk_discovery_controller = False
#spdk_ping_interval_in_seconds = 2.0
#max_hosts_per_namespace = 1
#max_namespaces_with_netmask = 1000
#max_subsystems = 128
#max_namespaces = 1024
#max_hosts_per_subsystem = 32

[gateway-logs]
log_level=debug
Expand Down
10 changes: 7 additions & 3 deletions control/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,12 @@ def gw_info(self, args):
out_func(f"Gateway's load balancing group: {gw_info.load_balancing_group}")
out_func(f"Gateway's address: {gw_info.addr}")
out_func(f"Gateway's port: {gw_info.port}")
if gw_info.max_subsystems:
out_func(f"Gateway's max subsystems: {gw_info.max_subsystems}")
if gw_info.max_namespaces:
out_func(f"Gateway's max namespaces: {gw_info.max_namespaces}")
if gw_info.max_hosts_per_subsystem:
out_func(f"Gateway's max hosts per subsystem: {gw_info.max_hosts_per_subsystem}")
if gw_info.spdk_version:
out_func(f"SPDK version: {gw_info.spdk_version}")
if not gw_info.bool_status:
Expand Down Expand Up @@ -663,9 +669,7 @@ def subsystem_add(self, args):
"""Create a subsystem"""

out_func, err_func = self.get_output_functions(args)
if args.max_namespaces == None:
args.max_namespaces = 256
if args.max_namespaces <= 0:
if args.max_namespaces != None and args.max_namespaces <= 0:
self.cli.parser.error("--max-namespaces value must be positive")
if args.subsystem == GatewayUtils.DISCOVERY_NQN:
self.cli.parser.error("Can't add a discovery subsystem")
Expand Down
96 changes: 80 additions & 16 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,14 @@ def __init__(self):
self.subsys_dhchap_key = defaultdict(dict)
self.host_dhchap_key = defaultdict(dict)
self.host_psk_key = defaultdict(dict)
self.host_nqn = defaultdict(dict)

def clean_subsystem(self, subsys):
self.host_psk_key.pop(subsys, None)
self.host_dhchap_key.pop(subsys, None)
self.subsys_allow_any_hosts.pop(subsys, None)
self.subsys_dhchap_key.pop(subsys, None)
self.host_nqn.pop(subsys, None)

def add_psk_host(self, subsys, host, key):
if key:
Expand Down Expand Up @@ -122,6 +124,24 @@ def get_hosts_with_dhchap_key(self, subsys):
return self.host_dhchap_key[subsys]
return {}

def add_host_nqn(self, subsys, hostnqn):
if not subsys in self.host_nqn:
self.host_nqn[subsys] = []
self.host_nqn[subsys].append(hostnqn)

def remove_host_nqn(self, subsys, hostnqn):
if not subsys in self.host_nqn:
return
try:
self.host_nqn[subsys].remove(hostnqn)
except ValueError:
pass

def get_host_count(self, subsys):
if not subsys in self.host_nqn:
return 0
return len(self.host_nqn[subsys])

def allow_any_host(self, subsys):
self.subsys_allow_any_hosts[subsys] = True

Expand Down Expand Up @@ -157,6 +177,9 @@ def __init__(self, nsid, bdev, uuid, anagrpid, no_auto_visible):
self.anagrpid = anagrpid
self.host_list = []

def __str__(self):
return f"nsid: {self.nsid}, bdev: {self.bdev}, uuid: {self.uuid}, no_auto_visible: {self.no_auto_visible}, anagrpid: {self.anagrpid}, hosts: {self.host_list}"

def empty(self) -> bool:
if self.bdev or self.uuid:
return False
Expand Down Expand Up @@ -220,20 +243,26 @@ def find_namespace(self, nqn, nsid, uuid = None) -> NamespaceInfo:
return NamespacesLocalList.EMPTY_NAMESPACE

def get_namespace_count(self, nqn, no_auto_visible = None, min_hosts = 0) -> int:
if nqn not in self.namespace_list:
if nqn and nqn not in self.namespace_list:
return 0

if nqn:
subsystems = [nqn]
else:
subsystems = self.namespace_list.keys()

ns_count = 0
for nsid in self.namespace_list[nqn]:
ns = self.namespace_list[nqn][nsid]
if ns.empty():
continue
if no_auto_visible is not None:
if ns.no_auto_visible == no_auto_visible and ns.host_count() >= min_hosts:
ns_count += 1
else:
if ns.host_count() >= min_hosts:
ns_count += 1
for one_subsys in subsystems:
for nsid in self.namespace_list[one_subsys]:
ns = self.namespace_list[one_subsys][nsid]
if ns.empty():
continue
if no_auto_visible is not None:
if ns.no_auto_visible == no_auto_visible and ns.host_count() >= min_hosts:
ns_count += 1
else:
if ns.host_count() >= min_hosts:
ns_count += 1

return ns_count

Expand Down Expand Up @@ -336,6 +365,15 @@ def __init__(self, config: GatewayConfig, gateway_state: GatewayStateHandler, rp
self.gateway_group = self.config.get_with_default("gateway", "group", "")
self.max_hosts_per_namespace = self.config.getint_with_default("gateway", "max_hosts_per_namespace", 1)
self.max_namespaces_with_netmask = self.config.getint_with_default("gateway", "max_namespaces_with_netmask", 1000)
self.max_subsystems = self.config.getint_with_default("gateway", "max_subsystems", 128)
if self.max_subsystems <= 0:
self.max_subsystems = 128
self.max_namespaces = self.config.getint_with_default("gateway", "max_namespaces", 1024)
if self.max_namespaces <= 0:
self.max_namespaces = 1024
self.max_hosts_per_subsystem = self.config.getint_with_default("gateway", "max_hosts_per_subsystem", 32)
if self.max_hosts_per_subsystem <= 0:
self.max_hosts_per_subsystem = 32
self.gateway_pool = self.config.get_with_default("ceph", "pool", "")
self.ana_map = defaultdict(dict)
self.cluster_nonce = {}
Expand Down Expand Up @@ -885,6 +923,12 @@ def create_subsystem_safe(self, request, context):
self.logger.error(f"{errmsg}")
return pb2.subsys_status(status = errno.EINVAL, error_message = errmsg, nqn = request.subsystem_nqn)

if not request.max_namespaces:
request.max_namespaces = self.max_namespaces
else:
if request.max_namespaces > self.max_namespaces:
self.logger.warning(f"The requested max number of namespaces for subsystem {request.subsystem_nqn} ({request.max_namespaces}) is greater than the global limit on the number of namespaces ({self.max_namespaces}), will continue")

errmsg = ""
if not GatewayState.is_key_element_valid(request.subsystem_nqn):
errmsg = f"{create_subsystem_error_prefix}: Invalid NQN \"{request.subsystem_nqn}\", contains invalid characters"
Expand All @@ -903,6 +947,11 @@ def create_subsystem_safe(self, request, context):
self.logger.error(f"{errmsg}")
return pb2.subsys_status(status = errno.EINVAL, error_message = errmsg, nqn = request.subsystem_nqn)

if len(self.subsys_max_ns) >= self.max_subsystems:
errmsg = f"{create_subsystem_error_prefix}: Maximal number of subsystems ({self.max_subsystems}) has already been reached"
self.logger.error(f"{errmsg}")
return pb2.subsys_status(status = errno.E2BIG, error_message = errmsg, nqn = request.subsystem_nqn)

if context:
if request.no_group_append or not self.gateway_group:
self.logger.info(f"Subsystem NQN will not be changed")
Expand Down Expand Up @@ -949,7 +998,7 @@ def create_subsystem_safe(self, request, context):
max_cntlid=max_cntlid,
ana_reporting = True,
)
self.subsys_max_ns[request.subsystem_nqn] = request.max_namespaces if request.max_namespaces else 32
self.subsys_max_ns[request.subsystem_nqn] = request.max_namespaces
if request.dhchap_key:
self.host_info.add_dhchap_key_to_subsystem(request.subsystem_nqn, request.dhchap_key)
self.logger.debug(f"create_subsystem {request.subsystem_nqn}: {ret}")
Expand Down Expand Up @@ -1172,18 +1221,23 @@ def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid, no_au

if no_auto_visible and self.subsystem_nsid_bdev_and_uuid.get_namespace_count(subsystem_nqn,
True, 0) >= self.max_namespaces_with_netmask:
errmsg = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}: maximal number of namespaces which are not auto visible ({self.max_namespaces_with_netmask}) has already been reached"
errmsg = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}: Maximal number of namespaces which are not auto visible ({self.max_namespaces_with_netmask}) has already been reached"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.E2BIG, error_message=errmsg)

if nsid and nsid > self.subsys_max_ns[subsystem_nqn]:
errmsg = f"Failure adding namespace to {subsystem_nqn}: requested NSID {nsid} is bigger than the maximal one ({self.subsys_max_ns[subsystem_nqn]})"
errmsg = f"Failure adding namespace to {subsystem_nqn}: Requested NSID {nsid} is bigger than the maximal one ({self.subsys_max_ns[subsystem_nqn]})"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.E2BIG, error_message=errmsg)

if not nsid and self.subsystem_nsid_bdev_and_uuid.get_namespace_count(subsystem_nqn,
False, 0) >= self.subsys_max_ns[subsystem_nqn]:
errmsg = f"Failure adding namespace to {subsystem_nqn}: maximal number of namespaces ({self.subsys_max_ns[subsystem_nqn]}) has already been reached"
None, 0) >= self.subsys_max_ns[subsystem_nqn]:
errmsg = f"Failure adding namespace to {subsystem_nqn}: Subsystem's maximal number of namespaces ({self.subsys_max_ns[subsystem_nqn]}) has already been reached"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.E2BIG, error_message=errmsg)

if self.subsystem_nsid_bdev_and_uuid.get_namespace_count(None, None, 0) >= self.max_namespaces:
errmsg = f"Failure adding namespace to {subsystem_nqn}: Maximal number of namespaces ({self.max_namespaces}) has already been reached"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.E2BIG, error_message=errmsg)

Expand Down Expand Up @@ -2411,6 +2465,11 @@ def add_host_safe(self, request, context):
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.EEXIST, error_message=errmsg)

if request.host_nqn != "*" and self.host_info.get_host_count(request.subsystem_nqn) >= self.max_hosts_per_subsystem:
errmsg = f"{host_failure_prefix}: Maximal number of hosts for subsystem ({self.max_hosts_per_subsystem}) has already been reached"
self.logger.error(f"{errmsg}")
return pb2.subsys_status(status = errno.E2BIG, error_message = errmsg, nqn = request.subsystem_nqn)

dhchap_ctrlr_key = self.host_info.get_subsystem_dhchap_key(request.subsystem_nqn)
if dhchap_ctrlr_key:
self.logger.info(f"Got DHCHAP key {dhchap_ctrlr_key} for subsystem {request.subsystem_nqn}")
Expand Down Expand Up @@ -2486,6 +2545,7 @@ def add_host_safe(self, request, context):
pass
if dhchap_file:
self.host_info.add_dhchap_host(request.subsystem_nqn, request.host_nqn, request.dhchap_key)
self.host_info.add_host_nqn(request.subsystem_nqn, request.host_nqn)
except Exception as ex:
if request.host_nqn == "*":
self.logger.exception(all_host_failure_prefix)
Expand Down Expand Up @@ -2609,6 +2669,7 @@ def remove_host_safe(self, request, context):
self.host_info.remove_dhchap_host(request.subsystem_nqn, request.host_nqn)
self.remove_all_host_key_files(request.subsystem_nqn, request.host_nqn)
self.remove_all_host_keys_from_keyring(request.subsystem_nqn, request.host_nqn)
self.host_info.remove_host_nqn(request.subsystem_nqn, request.host_nqn)
except Exception as ex:
if request.host_nqn == "*":
self.logger.exception(all_host_failure_prefix)
Expand Down Expand Up @@ -3679,6 +3740,9 @@ def get_gateway_info_safe(self, request, context):
load_balancing_group = self.group_id + 1,
bool_status = True,
hostname = self.host_name,
max_subsystems = self.max_subsystems,
max_namespaces = self.max_namespaces,
max_hosts_per_subsystem = self.max_hosts_per_subsystem,
status = 0,
error_message = os.strerror(0))
cli_ver = self.parse_version(cli_version_string)
Expand Down
3 changes: 3 additions & 0 deletions control/proto/gateway.proto
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,9 @@ message gateway_info {
optional string spdk_version = 10;
uint32 load_balancing_group = 11;
string hostname = 12;
optional uint32 max_subsystems = 13;
optional uint32 max_namespaces = 14;
optional uint32 max_hosts_per_subsystem = 15;
}

message cli_version {
Expand Down
Loading

0 comments on commit 2a26ee9

Please sign in to comment.