From 47d395a08e790a888ff61996224e8f7e5b5e2a0a Mon Sep 17 00:00:00 2001 From: Ofer Gill Date: Fri, 6 Aug 2021 17:13:53 -0600 Subject: [PATCH] AUE-129: Allows IP CIDR and Docker names to be changed at command-line - IP CIDR dictates the ip's & cidr for the docker network, each docker node, and the zk ensemble - Docker names change the image name, network name, and node prefix used when RCTC invokes docker --- testing/cluster_test_utils.py | 72 +++++++++++++++++++++++++------- testing/ramcloud_test_cluster.py | 25 +++++++++-- 2 files changed, 79 insertions(+), 18 deletions(-) diff --git a/testing/cluster_test_utils.py b/testing/cluster_test_utils.py index 7f8b059..8c191ba 100644 --- a/testing/cluster_test_utils.py +++ b/testing/cluster_test_utils.py @@ -37,6 +37,41 @@ ten_minutes = 600 # number of seconds in 10 minutes +cluster_cidr = "169.254.3.0/24" +cluster_ip_prefix = "169.254.3" +cluster_notation = 24 + +docker_image_name = "ramcloud-test" +docker_network_name = "ramcloud-net" +docker_node_prefix = "ramcloud-node" + +def set_cluster_cidr(val): + # We are changing these global variables in this method! + global cluster_cidr, cluster_ip_prefix, cluster_notation + cluster_cidr=val + cn = cluster_cidr.split('/') + if (len(cn) < 2): + logger.error("Missing required '/' in cidr, provided: {}".format(cluster_cidr)) + exit(1) + cluster_notation = int(cn[1]) + if (cluster_notation != 16 and cluster_notation != 24): + logger.error("Only cidr notation of /16 or /24 is supported at the moment, provided: {}".format(cluster_cidr)) + exit(1) + ip_parts = cn[0].split('.') + if (len(ip_parts) < 4): + logger.error("IPv4 format with 4 numbers expected, provided: {}".format(cluster_cidr)) + exit(1) + cluster_ip_prefix = '.'.join(ip_parts[0:3]) + +def set_docker_names(val): + # We are changing these global variables in this method! + global docker_image_name, docker_network_name, docker_node_prefix + names = val.split(',') + if (len(names) < 3): + logger.error("Three names required, provided: {}".format(val)) + exit(1) + (docker_image_name, docker_network_name, docker_node_prefix) = names[0:3] + def get_zookeeper_client(ensemble, read_only=True): client = kazoo.client.KazooClient(hosts=external_storage_string(ensemble), read_only=read_only) client.start() @@ -55,20 +90,27 @@ def ensemble_servers_string(ensemble): return ' '.join(['server.{}={}:2888:3888;2181'.format(zkid, ip) for (zkid, ip) in list(ensemble.items())]) def get_node_image(): - existing_images = docker_client.images.list(name="ramcloud-test") + existing_images = docker_client.images.list(name=docker_image_name) if (len(existing_images) > 0): - logger.info('Found existing ramcloud-test image, using that...') + logger.info('Found existing {} image, using that...'.format(docker_image_name)) return existing_images[0] - logger.info('Building ramcloud-test-node image...') + logger.info('Building {} image...'.format(docker_image_name)) node_image = docker_client.images.build(path='/src', dockerfile='/src/config/Dockerfile.node', - tag='ramcloud-test')[0] - logger.info('Building ramcloud-test-node image...succeeded') + tag=docker_image_name)[0] + logger.info('Building {} image...succeeded'.format(docker_image_name)) return node_image def make_docker_network(name, subnet): logger.info('Creating docker network %s on subnet %s...', name, subnet) - ramcloud_net_pool = docker.types.IPAMPool(subnet=subnet) + # When cluster_notation is 16, gateway is None (auto-determined), and when cluster_notation is 24, + # then the gateway should be a.b.c.254, with a.b.c dictated by cluster_ip_prefix + # NOTE: There are probably other valid cluster_notation values that work with docker.types.IPAMPool, + # but this requires a bit of careful time and experimentation to find out. + gateway=None + if (cluster_notation == 24): + gateway="{}.254".format(cluster_ip_prefix) + ramcloud_net_pool = docker.types.IPAMPool(subnet=subnet, gateway=gateway) ramcloud_net_config = docker.types.IPAMConfig(pool_configs=[ramcloud_net_pool]) network = docker_client.networks.create(name, ipam=ramcloud_net_config, check_duplicate=True) logger.info('Creating docker network %s on subnet %s...succeeded', name, subnet) @@ -108,10 +150,10 @@ def launch_node(cluster_name, hostname, zk_servers, external_storage, zkid, ip, return docker_client.containers.get(container_id) def get_status(): - docker_containers = docker_client.containers.list(all=True, filters={"name":"ramcloud-node-*"}) + docker_containers = docker_client.containers.list(all=True, filters={"name":"{}-*".format(docker_node_prefix)}) docker_network = False try: - docker_network = docker_client.networks.get("ramcloud-net") + docker_network = docker_client.networks.get(docker_network_name) except docker.errors.NotFound as nf: pass if not docker_containers: @@ -136,7 +178,9 @@ def destroy_network_and_containers(docker_network, docker_containers): print("unable to destroy containers and/or network") def get_ensemble(num_nodes = 3): - return {i: '10.0.1.{}'.format(i) for i in range(1, num_nodes + 1)} + # NOTE: There is probably a more flexible way to support ip's for the nodes, + # but the manner shown here works when cluster_notation is 16 or 24 + return {i: '{}.{}'.format(cluster_ip_prefix, i) for i in range(1, num_nodes + 1)} def get_table_names(ensemble): try: @@ -237,27 +281,27 @@ def setUp(self, num_nodes = 4): assert (num_nodes >= 3), ("num_nodes(%s) must be at least 3."%num_nodes) # clean out any old docker fixtures - docker_containers = docker_client.containers.list(all=True, filters={"name":"ramcloud-node-*"}) + docker_containers = docker_client.containers.list(all=True, filters={"name":"{}-*".format(docker_node_prefix)}) try: for dc in docker_containers: print("removing container:", dc.name) dc.remove(force=True) - docker_network = docker_client.networks.get("ramcloud-net") + docker_network = docker_client.networks.get(docker_network_name) print("removing network:", docker_network); docker_network.remove() except docker.errors.NotFound as nf: # NotFound is ignored because we're trying to remove the network whether it's there or not pass - self.ramcloud_network = make_docker_network('ramcloud-net', '10.0.0.0/16') + self.ramcloud_network = make_docker_network(docker_network_name, cluster_cidr) self.node_image = get_node_image() self.rc_client = ramcloud.RAMCloud() self.node_containers = {} - self.ensemble = {i: '10.0.1.{}'.format(i) for i in range(1, num_nodes + 1)} + self.ensemble = {i: '{}.{}'.format(cluster_ip_prefix, i) for i in range(1, num_nodes + 1)} zk_servers = ensemble_servers_string(self.ensemble) external_storage = 'zk:' + external_storage_string(self.ensemble) for i in range(1, num_nodes + 1): - hostname = 'ramcloud-node-{}'.format(i) + hostname = '{}-{}'.format(docker_node_prefix, i) self.node_containers[self.ensemble[i]] = launch_node('main', hostname, zk_servers, diff --git a/testing/ramcloud_test_cluster.py b/testing/ramcloud_test_cluster.py index aa1c3cf..6ad1ac8 100644 --- a/testing/ramcloud_test_cluster.py +++ b/testing/ramcloud_test_cluster.py @@ -8,26 +8,43 @@ # >>> import ramcloud # >>> import cluster_test_utils as ctu # >>> rc = ramcloud.RAMCloud() -# >>> rc.connect('zk:10.0.1.1:2181,10.0.1.2:2181,10.0.1.3:2181', 'main') +# >>> rc.connect('zk:169.254.3.1:2181,169.254.3.2:2181,169.254.3.3:2181', 'main') # >>> rc.create_table('test') # >>> tid = rc.get_table_id('test') # >>> rc.write(tid, 'testKey', 'testValue') # >>> rc.read(tid, 'testKey') if __name__ == '__main__': - parser = argparse.ArgumentParser() + # We list all argument default values as part of the "help menu" + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--action', '-a', metavar='A', type=str, default="status", - help="Defines the action to take: status, reset, log, start, stop") + help="Defines the action to take, which is one of: status reset log start stop") parser.add_argument('--nodes', '-n', type=int, default=3, - help="Number of zk, rc-coordinator, and rc-server instances to bring up. Only relevant when there's no cluster up yet. Default is 3") + help="Number of zk, rc-coordinator, and rc-server instances to bring up. Only relevant when there's no cluster up yet.") parser.add_argument('--path', '-p', type=str, default="/src/tmp", help="Path to place logs in when action is set to \"log\"") + parser.add_argument('--cidr', '-c', type=str, default="169.254.3.0/24", + help="IPv4 CIDR to use for the docker network, docker nodes, and zk ensemble in the RAMCloud test cluster. " + "NOTE that only CIDR notations of /24 or /16 are supported at the moment in this program.") + parser.add_argument('--docker-names', '-d', type=str, default="ramcloud-test,ramcloud-net,ramcloud-node", + help="Three comma-separated names without spaces, corresponding to IMAGE,NETWORK,NODE where: " + "IMAGE is the name of the docker image to either look for or build, " + "NETWORK is the name of the docker network to create (not an IP address), " + "and NODE is the prefix to use for the names of the docker containers corresponding to the nodes, and " + "appears as NODE-1, NODE-2, NODE-3, etc.") args = parser.parse_args() print("action =",args.action) print("nodes =",args.nodes) print("path =",args.path) + +ctu.set_cluster_cidr(args.cidr) +ctu.set_docker_names(args.docker_names) + +print("cidr = {}.0/{}".format(ctu.cluster_ip_prefix, ctu.cluster_notation)) +print("docker_names = {},{},{}".format(ctu.docker_image_name, ctu.docker_network_name, ctu.docker_node_prefix)) + if (args.action == "start"): x = ctu.ClusterTest() x.setUp(num_nodes = args.nodes)