From f262484322ef480e30507def6ba2c6850f33e0c5 Mon Sep 17 00:00:00 2001 From: Gil Bregman Date: Fri, 15 Nov 2024 00:32:33 +0200 Subject: [PATCH] Enforce scale limitations Fixes #948 Signed-off-by: Gil Bregman --- ceph-nvmeof.conf | 3 ++ control/cli.py | 10 ++-- control/grpc.py | 91 ++++++++++++++++++++++++++++++------- control/proto/gateway.proto | 3 ++ tests/test_cli.py | 72 +++++++++++++++++++++++------ 5 files changed, 147 insertions(+), 32 deletions(-) diff --git a/ceph-nvmeof.conf b/ceph-nvmeof.conf index 95abf43d..1539e308 100644 --- a/ceph-nvmeof.conf +++ b/ceph-nvmeof.conf @@ -31,6 +31,9 @@ enable_spdk_discovery_controller = False #spdk_ping_interval_in_seconds = 2.0 #max_hosts_per_namespace = 1 #max_namespaces_with_netmask = 1000 +#max_subsystems = 128 +#max_namespaces = 1024 +#max_hosts_per_subsystem = 32 [gateway-logs] log_level=debug diff --git a/control/cli.py b/control/cli.py index a5813bc5..2eded857 100644 --- a/control/cli.py +++ b/control/cli.py @@ -357,6 +357,12 @@ def gw_info(self, args): out_func(f"Gateway's load balancing group: {gw_info.load_balancing_group}") out_func(f"Gateway's address: {gw_info.addr}") out_func(f"Gateway's port: {gw_info.port}") + if gw_info.max_subsystems: + out_func(f"Gateway's max subsystems: {gw_info.max_subsystems}") + if gw_info.max_namespaces: + out_func(f"Gateway's max namespaces: {gw_info.max_namespaces}") + if gw_info.max_hosts_per_subsystem: + out_func(f"Gateway's max hosts per subsystem: {gw_info.max_hosts_per_subsystem}") if gw_info.spdk_version: out_func(f"SPDK version: {gw_info.spdk_version}") if not gw_info.bool_status: @@ -663,9 +669,7 @@ def subsystem_add(self, args): """Create a subsystem""" out_func, err_func = self.get_output_functions(args) - if args.max_namespaces == None: - args.max_namespaces = 256 - if args.max_namespaces <= 0: + if args.max_namespaces != None and args.max_namespaces <= 0: self.cli.parser.error("--max-namespaces value must be positive") if args.subsystem == GatewayUtils.DISCOVERY_NQN: self.cli.parser.error("Can't add a discovery subsystem") diff --git a/control/grpc.py b/control/grpc.py index 81bf52c4..8d8643f1 100644 --- a/control/grpc.py +++ b/control/grpc.py @@ -70,12 +70,14 @@ def __init__(self): self.subsys_dhchap_key = defaultdict(dict) self.host_dhchap_key = defaultdict(dict) self.host_psk_key = defaultdict(dict) + self.host_nqn = defaultdict(set) def clean_subsystem(self, subsys): self.host_psk_key.pop(subsys, None) self.host_dhchap_key.pop(subsys, None) self.subsys_allow_any_hosts.pop(subsys, None) self.subsys_dhchap_key.pop(subsys, None) + self.host_nqn.pop(subsys, None) def add_psk_host(self, subsys, host, key): if key: @@ -122,6 +124,19 @@ def get_hosts_with_dhchap_key(self, subsys): return self.host_dhchap_key[subsys] return {} + def add_host_nqn(self, subsys, hostnqn): + self.host_nqn[subsys].add(hostnqn) + + def remove_host_nqn(self, subsys, hostnqn): + if not subsys in self.host_nqn: + return + self.host_nqn[subsys].discard(hostnqn) + + def get_host_count(self, subsys): + if not subsys in self.host_nqn: + return 0 + return len(self.host_nqn[subsys]) + def allow_any_host(self, subsys): self.subsys_allow_any_hosts[subsys] = True @@ -157,6 +172,9 @@ def __init__(self, nsid, bdev, uuid, anagrpid, no_auto_visible): self.anagrpid = anagrpid self.host_list = [] + def __str__(self): + return f"nsid: {self.nsid}, bdev: {self.bdev}, uuid: {self.uuid}, no_auto_visible: {self.no_auto_visible}, anagrpid: {self.anagrpid}, hosts: {self.host_list}" + def empty(self) -> bool: if self.bdev or self.uuid: return False @@ -220,20 +238,26 @@ def find_namespace(self, nqn, nsid, uuid = None) -> NamespaceInfo: return NamespacesLocalList.EMPTY_NAMESPACE def get_namespace_count(self, nqn, no_auto_visible = None, min_hosts = 0) -> int: - if nqn not in self.namespace_list: + if nqn and nqn not in self.namespace_list: return 0 + if nqn: + subsystems = [nqn] + else: + subsystems = self.namespace_list.keys() + ns_count = 0 - for nsid in self.namespace_list[nqn]: - ns = self.namespace_list[nqn][nsid] - if ns.empty(): - continue - if no_auto_visible is not None: - if ns.no_auto_visible == no_auto_visible and ns.host_count() >= min_hosts: - ns_count += 1 - else: - if ns.host_count() >= min_hosts: - ns_count += 1 + for one_subsys in subsystems: + for nsid in self.namespace_list[one_subsys]: + ns = self.namespace_list[one_subsys][nsid] + if ns.empty(): + continue + if no_auto_visible is not None: + if ns.no_auto_visible == no_auto_visible and ns.host_count() >= min_hosts: + ns_count += 1 + else: + if ns.host_count() >= min_hosts: + ns_count += 1 return ns_count @@ -336,6 +360,15 @@ def __init__(self, config: GatewayConfig, gateway_state: GatewayStateHandler, rp self.gateway_group = self.config.get_with_default("gateway", "group", "") self.max_hosts_per_namespace = self.config.getint_with_default("gateway", "max_hosts_per_namespace", 1) self.max_namespaces_with_netmask = self.config.getint_with_default("gateway", "max_namespaces_with_netmask", 1000) + self.max_subsystems = self.config.getint_with_default("gateway", "max_subsystems", 128) + if self.max_subsystems <= 0: + self.max_subsystems = 128 + self.max_namespaces = self.config.getint_with_default("gateway", "max_namespaces", 1024) + if self.max_namespaces <= 0: + self.max_namespaces = 1024 + self.max_hosts_per_subsystem = self.config.getint_with_default("gateway", "max_hosts_per_subsystem", 32) + if self.max_hosts_per_subsystem <= 0: + self.max_hosts_per_subsystem = 32 self.gateway_pool = self.config.get_with_default("ceph", "pool", "") self.ana_map = defaultdict(dict) self.cluster_nonce = {} @@ -885,6 +918,12 @@ def create_subsystem_safe(self, request, context): self.logger.error(f"{errmsg}") return pb2.subsys_status(status = errno.EINVAL, error_message = errmsg, nqn = request.subsystem_nqn) + if not request.max_namespaces: + request.max_namespaces = self.max_namespaces + else: + if request.max_namespaces > self.max_namespaces: + self.logger.warning(f"The requested max number of namespaces for subsystem {request.subsystem_nqn} ({request.max_namespaces}) is greater than the global limit on the number of namespaces ({self.max_namespaces}), will continue") + errmsg = "" if not GatewayState.is_key_element_valid(request.subsystem_nqn): errmsg = f"{create_subsystem_error_prefix}: Invalid NQN \"{request.subsystem_nqn}\", contains invalid characters" @@ -903,6 +942,11 @@ def create_subsystem_safe(self, request, context): self.logger.error(f"{errmsg}") return pb2.subsys_status(status = errno.EINVAL, error_message = errmsg, nqn = request.subsystem_nqn) + if len(self.subsys_max_ns) >= self.max_subsystems: + errmsg = f"{create_subsystem_error_prefix}: Maximal number of subsystems ({self.max_subsystems}) has already been reached" + self.logger.error(f"{errmsg}") + return pb2.subsys_status(status = errno.E2BIG, error_message = errmsg, nqn = request.subsystem_nqn) + if context: if request.no_group_append or not self.gateway_group: self.logger.info(f"Subsystem NQN will not be changed") @@ -949,7 +993,7 @@ def create_subsystem_safe(self, request, context): max_cntlid=max_cntlid, ana_reporting = True, ) - self.subsys_max_ns[request.subsystem_nqn] = request.max_namespaces if request.max_namespaces else 32 + self.subsys_max_ns[request.subsystem_nqn] = request.max_namespaces if request.dhchap_key: self.host_info.add_dhchap_key_to_subsystem(request.subsystem_nqn, request.dhchap_key) self.logger.debug(f"create_subsystem {request.subsystem_nqn}: {ret}") @@ -1172,18 +1216,23 @@ def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid, no_au if no_auto_visible and self.subsystem_nsid_bdev_and_uuid.get_namespace_count(subsystem_nqn, True, 0) >= self.max_namespaces_with_netmask: - errmsg = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}: maximal number of namespaces which are not auto visible ({self.max_namespaces_with_netmask}) has already been reached" + errmsg = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}: Maximal number of namespaces which are not auto visible ({self.max_namespaces_with_netmask}) has already been reached" self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.E2BIG, error_message=errmsg) if nsid and nsid > self.subsys_max_ns[subsystem_nqn]: - errmsg = f"Failure adding namespace to {subsystem_nqn}: requested NSID {nsid} is bigger than the maximal one ({self.subsys_max_ns[subsystem_nqn]})" + errmsg = f"Failure adding namespace to {subsystem_nqn}: Requested NSID {nsid} is bigger than the maximal one ({self.subsys_max_ns[subsystem_nqn]})" self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.E2BIG, error_message=errmsg) if not nsid and self.subsystem_nsid_bdev_and_uuid.get_namespace_count(subsystem_nqn, - False, 0) >= self.subsys_max_ns[subsystem_nqn]: - errmsg = f"Failure adding namespace to {subsystem_nqn}: maximal number of namespaces ({self.subsys_max_ns[subsystem_nqn]}) has already been reached" + None, 0) >= self.subsys_max_ns[subsystem_nqn]: + errmsg = f"Failure adding namespace to {subsystem_nqn}: Subsystem's maximal number of namespaces ({self.subsys_max_ns[subsystem_nqn]}) has already been reached" + self.logger.error(f"{errmsg}") + return pb2.req_status(status=errno.E2BIG, error_message=errmsg) + + if self.subsystem_nsid_bdev_and_uuid.get_namespace_count(None, None, 0) >= self.max_namespaces: + errmsg = f"Failure adding namespace to {subsystem_nqn}: Maximal number of namespaces ({self.max_namespaces}) has already been reached" self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.E2BIG, error_message=errmsg) @@ -2411,6 +2460,11 @@ def add_host_safe(self, request, context): self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.EEXIST, error_message=errmsg) + if request.host_nqn != "*" and self.host_info.get_host_count(request.subsystem_nqn) >= self.max_hosts_per_subsystem: + errmsg = f"{host_failure_prefix}: Maximal number of hosts for subsystem ({self.max_hosts_per_subsystem}) has already been reached" + self.logger.error(f"{errmsg}") + return pb2.subsys_status(status = errno.E2BIG, error_message = errmsg, nqn = request.subsystem_nqn) + dhchap_ctrlr_key = self.host_info.get_subsystem_dhchap_key(request.subsystem_nqn) if dhchap_ctrlr_key: self.logger.info(f"Got DHCHAP key {dhchap_ctrlr_key} for subsystem {request.subsystem_nqn}") @@ -2486,6 +2540,7 @@ def add_host_safe(self, request, context): pass if dhchap_file: self.host_info.add_dhchap_host(request.subsystem_nqn, request.host_nqn, request.dhchap_key) + self.host_info.add_host_nqn(request.subsystem_nqn, request.host_nqn) except Exception as ex: if request.host_nqn == "*": self.logger.exception(all_host_failure_prefix) @@ -2609,6 +2664,7 @@ def remove_host_safe(self, request, context): self.host_info.remove_dhchap_host(request.subsystem_nqn, request.host_nqn) self.remove_all_host_key_files(request.subsystem_nqn, request.host_nqn) self.remove_all_host_keys_from_keyring(request.subsystem_nqn, request.host_nqn) + self.host_info.remove_host_nqn(request.subsystem_nqn, request.host_nqn) except Exception as ex: if request.host_nqn == "*": self.logger.exception(all_host_failure_prefix) @@ -3679,6 +3735,9 @@ def get_gateway_info_safe(self, request, context): load_balancing_group = self.group_id + 1, bool_status = True, hostname = self.host_name, + max_subsystems = self.max_subsystems, + max_namespaces = self.max_namespaces, + max_hosts_per_subsystem = self.max_hosts_per_subsystem, status = 0, error_message = os.strerror(0)) cli_ver = self.parse_version(cli_version_string) diff --git a/control/proto/gateway.proto b/control/proto/gateway.proto index 7ab76216..326323fd 100644 --- a/control/proto/gateway.proto +++ b/control/proto/gateway.proto @@ -411,6 +411,9 @@ message gateway_info { optional string spdk_version = 10; uint32 load_balancing_group = 11; string hostname = 12; + optional uint32 max_subsystems = 13; + optional uint32 max_namespaces = 14; + optional uint32 max_hosts_per_subsystem = 15; } message cli_version { diff --git a/tests/test_cli.py b/tests/test_cli.py index 951a25f8..d41440f0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -21,17 +21,27 @@ image8 = "mytestdevimage8" image9 = "mytestdevimage9" image10 = "mytestdevimage10" +image11 = "mytestdevimage11" pool = "rbd" subsystem = "nqn.2016-06.io.spdk:cnode1" subsystem2 = "nqn.2016-06.io.spdk:cnode2" subsystem3 = "nqn.2016-06.io.spdk:cnode3" subsystem4 = "nqn.2016-06.io.spdk:cnode4" subsystem5 = "nqn.2016-06.io.spdk:cnode5" +subsystem6 = "nqn.2016-06.io.spdk:cnode6" +subsystem7 = "nqn.2016-06.io.spdk:cnode7" discovery_nqn = "nqn.2014-08.org.nvmexpress.discovery" serial = "Ceph00000000000001" uuid = "948878ee-c3b2-4d58-a29b-2cff713fc02d" uuid2 = "948878ee-c3b2-4d58-a29b-2cff713fc02e" host_list = ["nqn.2016-06.io.spdk:host1", "*"] +host1 = "nqn.2016-06.io.spdk:host1" +host2 = "nqn.2016-06.io.spdk:host2" +host3 = "nqn.2016-06.io.spdk:host3" +host4 = "nqn.2016-06.io.spdk:host4" +host5 = "nqn.2016-06.io.spdk:host5" +host6 = "nqn.2016-06.io.spdk:host6" +host7 = "nqn.2016-06.io.spdk:host7" nsid = "1" anagrpid = "1" anagrpid2 = "2" @@ -58,6 +68,9 @@ def gateway(config): port = config.getint("gateway", "port") config.config["gateway"]["group"] = group_name config.config["gateway"]["max_namespaces_with_netmask"] = "3" + config.config["gateway"]["max_subsystems"] = "3" + config.config["gateway"]["max_namespaces"] = "11" + config.config["gateway"]["max_hosts_per_subsystem"] = "4" config.config["gateway-logs"]["log_level"] = "debug" ceph_utils = CephUtils(config) @@ -140,6 +153,9 @@ def test_get_gateway_info(self, caplog, gateway): assert gw_info.spdk_version == spdk_ver assert gw_info.name == gw.gateway_name assert gw_info.hostname == gw.host_name + assert gw_info.max_subsystems == 3 + assert gw_info.max_namespaces == 11 + assert gw_info.max_hosts_per_subsystem == 4 assert gw_info.status == 0 assert gw_info.bool_status == True @@ -185,14 +201,15 @@ def test_create_subsystem(self, caplog, gateway): assert f"contains invalid characters" in caplog.text caplog.clear() cli(["subsystem", "add", "--subsystem", subsystem, "--max-namespaces", "2049", "--no-group-append"]) - assert f"create_subsystem {subsystem}: True" in caplog.text + assert f"The requested max number of namespaces for subsystem {subsystem} (2049) is greater than the global limit on the number of namespaces (11), will continue" in caplog.text + assert f"Adding subsystem {subsystem}: Successful" in caplog.text cli(["--format", "json", "subsystem", "list"]) assert f'"serial_number": "{serial}"' not in caplog.text assert f'"nqn": "{subsystem}"' in caplog.text assert f'"max_namespaces": 2049' in caplog.text caplog.clear() cli(["subsystem", "add", "--subsystem", subsystem2, "--serial-number", serial, "--no-group-append"]) - assert f"create_subsystem {subsystem2}: True" in caplog.text + assert f"Adding subsystem {subsystem2}: Successful" in caplog.text caplog.clear() cli(["--format", "json", "subsystem", "list"]) assert f'"serial_number": "{serial}"' in caplog.text @@ -476,7 +493,7 @@ def test_add_all_hosts_to_namespace(self, caplog, gateway): def test_add_too_many_namespaces_to_a_subsystem(self, caplog, gateway): caplog.clear() cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image9, "--nsid", "3000", "--size", "16MB", "--rbd-create-image"]) - assert f"Failure adding namespace to {subsystem}: requested NSID 3000 is bigger than the maximal one" in caplog.text + assert f"Failure adding namespace to {subsystem}: Requested NSID 3000 is bigger than the maximal one" in caplog.text assert f"Received request to delete bdev" in caplog.text caplog.clear() cli(["subsystem", "add", "--subsystem", subsystem5, "--no-group-append", "--max-namespaces", "1"]) @@ -486,7 +503,7 @@ def test_add_too_many_namespaces_to_a_subsystem(self, caplog, gateway): assert f"Adding namespace 1 to {subsystem5}: Successful" in caplog.text caplog.clear() cli(["namespace", "add", "--subsystem", subsystem5, "--rbd-pool", pool, "--rbd-image", image10, "--size", "16MB", "--rbd-create-image"]) - assert f"Failure adding namespace to {subsystem5}: maximal number of namespaces (1) has already been reached" in caplog.text + assert f"Failure adding namespace to {subsystem5}: Subsystem's maximal number of namespaces (1) has already been reached" in caplog.text assert f"Received request to delete bdev" in caplog.text caplog.clear() cli(["subsystem", "del", "--subsystem", subsystem5, "--force"]) @@ -515,7 +532,7 @@ def test_add_host_to_wrong_namespace(self, caplog, gateway): def test_add_too_many_namespaces_with_hosts(self, caplog, gateway): caplog.clear() cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image8, "--size", "16MB", "--rbd-create-image", "--no-auto-visible"]) - assert f"Failure adding namespace to {subsystem}: maximal number of namespaces which are not auto visible (3) has already been reached" in caplog.text + assert f"Failure adding namespace to {subsystem}: Maximal number of namespaces which are not auto visible (3) has already been reached" in caplog.text caplog.clear() cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image8, "--size", "16MB", "--rbd-create-image"]) assert f"Adding namespace 11 to {subsystem}: Successful" in caplog.text @@ -534,6 +551,11 @@ def test_list_namespace_with_no_hosts(self, caplog, gateway): assert f'"no_auto_visible": true' in caplog.text assert f'"hosts": []' in caplog.text + def test_add_too_many_namespaces(self, caplog, gateway): + caplog.clear() + cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image11, "--size", "16MB", "--rbd-create-image"]) + assert f"Failure adding namespace to {subsystem}: Maximal number of namespaces (11) has already been reached" in caplog.text + def test_resize_namespace(self, caplog, gateway): gw, stub = gateway caplog.clear() @@ -775,10 +797,10 @@ def test_add_host_invalid_nqn(self, caplog): def test_host_list(self, caplog): caplog.clear() - cli(["host", "add", "--subsystem", subsystem, "--host-nqn", "nqn.2016-06.io.spdk:host5", "nqn.2016-06.io.spdk:host6", "nqn.2016-06.io.spdk:host7"]) - assert f"Adding host nqn.2016-06.io.spdk:host5 to {subsystem}: Successful" in caplog.text - assert f"Adding host nqn.2016-06.io.spdk:host6 to {subsystem}: Successful" in caplog.text - assert f"Adding host nqn.2016-06.io.spdk:host7 to {subsystem}: Successful" in caplog.text + cli(["host", "add", "--subsystem", subsystem, "--host-nqn", host5, host6, host7]) + assert f"Adding host {host5} to {subsystem}: Successful" in caplog.text + assert f"Adding host {host6} to {subsystem}: Successful" in caplog.text + assert f"Adding host {host7} to {subsystem}: Successful" in caplog.text @pytest.mark.parametrize("listener", listener_list) def test_create_listener(self, caplog, listener, gateway): @@ -1073,8 +1095,7 @@ def test_create_subsys_group_name(self, caplog, gateway): cli(["subsystem", "add", "--subsystem", subsystem3]) assert f"Adding subsystem {subsystem3}.{group_name}: Successful" in caplog.text assert f"Subsystem NQN was changed to {subsystem3}.{group_name}, adding the group name" in caplog.text - assert f"create_subsystem {subsystem3}.{group_name}: True" in caplog.text - assert f"create_subsystem {subsystem3}: True" not in caplog.text + assert f"Adding subsystem {subsystem3}: Successful" not in caplog.text cli(["--format", "json", "subsystem", "list"]) assert f'"nqn": "{subsystem3}.{group_name}"' in caplog.text assert f'"nqn": "{subsystem3}"' not in caplog.text @@ -1082,12 +1103,37 @@ def test_create_subsys_group_name(self, caplog, gateway): cli(["subsystem", "add", "--subsystem", subsystem4, "--no-group-append"]) assert f"Adding subsystem {subsystem4}: Successful" in caplog.text assert f"Subsystem NQN will not be changed" in caplog.text - assert f"create_subsystem {subsystem4}.{group_name}: True" not in caplog.text - assert f"create_subsystem {subsystem4}: True" in caplog.text + assert f"Adding subsystem {subsystem4}.{group_name}: Successful" not in caplog.text cli(["--format", "json", "subsystem", "list"]) assert f'"nqn": "{subsystem4}.{group_name}"' not in caplog.text assert f'"nqn": "{subsystem4}"' in caplog.text +class TestTooManySubsystemsAndHosts: + def test_add_too_many_subsystem(self, caplog, gateway): + caplog.clear() + cli(["subsystem", "add", "--subsystem", subsystem6, "--no-group-append"]) + assert f"Adding subsystem {subsystem6}: Successful" in caplog.text + caplog.clear() + cli(["subsystem", "add", "--subsystem", subsystem7, "--no-group-append"]) + assert f"Failure creating subsystem {subsystem7}: Maximal number of subsystems (3) has already been reached" in caplog.text + + def test_too_many_hosts(self, caplog, gateway): + caplog.clear() + cli(["host", "add", "--subsystem", subsystem6, "--host-nqn", host1]) + assert f"Adding host {host1} to {subsystem6}: Successful" in caplog.text + caplog.clear() + cli(["host", "add", "--subsystem", subsystem6, "--host-nqn", host2]) + assert f"Adding host {host2} to {subsystem6}: Successful" in caplog.text + caplog.clear() + cli(["host", "add", "--subsystem", subsystem6, "--host-nqn", host3]) + assert f"Adding host {host3} to {subsystem6}: Successful" in caplog.text + caplog.clear() + cli(["host", "add", "--subsystem", subsystem6, "--host-nqn", host4]) + assert f"Adding host {host4} to {subsystem6}: Successful" in caplog.text + caplog.clear() + cli(["host", "add", "--subsystem", subsystem6, "--host-nqn", host5]) + assert f"Failure adding host {host5} to {subsystem6}: Maximal number of hosts for subsystem (4) has already been reached" in caplog.text + class TestGwLogLevel: def test_gw_log_level(self, caplog, gateway): caplog.clear()