From c188a68e2b487191f1f3004e22b68c21e26c3f2e Mon Sep 17 00:00:00 2001 From: Geoff Anderson Date: Sun, 20 Mar 2016 16:46:42 -0700 Subject: [PATCH] MINOR: Add vagrant up wrapper for simple parallel bringup on aws The main impediment to bringing up aws machines in parallel using vagrant was the interaction between `vagrant-hostmanager` and `vagrant-aws`. If you disable hostmanager during the `up` phase, and run it after the cluster is up, parallel bringup is possible. The only caveat is that machines must be brought up in small-ish batches to prevent rate limit errors from AWS since `vagrant-aws` doesn't seem to have mechanisms to This PR: - disables `vagrant-hostmanager` during bringup - adds a wrapper script to make it convenient to bring machines up in batches on aws Author: Geoff Anderson Reviewers: Ewen Cheslack-Postava Closes #982 from granders/vagrant-disable-hostmanager --- Vagrantfile | 4 +- tests/README.md | 9 +- vagrant/README.md | 17 +- vagrant/aws/aws-example-Vagrantfile.local | 1 + vagrant/vagrant-up.sh | 237 ++++++++++++++++++++++ 5 files changed, 258 insertions(+), 10 deletions(-) create mode 100755 vagrant/vagrant-up.sh diff --git a/Vagrantfile b/Vagrantfile index 51066ff5bde53..0471a7e78df2f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -22,6 +22,8 @@ VAGRANTFILE_API_VERSION = "2" # General config enable_dns = false +# Override to false when bringing up a cluster on AWS +enable_hostmanager = true enable_jmx = false num_zookeepers = 1 num_brokers = 3 @@ -55,7 +57,7 @@ end # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.hostmanager.enabled = true + config.hostmanager.enabled = enable_hostmanager config.hostmanager.manage_host = enable_dns config.hostmanager.include_offline = false diff --git a/tests/README.md b/tests/README.md index 045732f49f5e1..143711d8b4927 100644 --- a/tests/README.md +++ b/tests/README.md @@ -26,7 +26,8 @@ https://cwiki.apache.org/confluence/display/KAFKA/tutorial+-+set+up+and+run+Kafk * Bring up the test cluster - $ vagrant up + $ vagrant/vagrant-up.sh + $ # When using Virtualbox, it also works to run: vagrant up * Build the desired branch of Kafka @@ -111,6 +112,7 @@ the test driver machine. ec2_instance_type = "..." # Pick something appropriate for your # test. Note that the default m3.medium has # a small disk. + enable_hostmanager = false num_zookeepers = 0 num_kafka = 0 num_workers = 9 @@ -120,9 +122,10 @@ the test driver machine. ec2_region = 'us-west-2' ec2_ami = "ami-29ebb519" -* Start up the instances (note we have found bringing up machines in parallel can cause errors on aws): +* Start up the instances: - $ vagrant up --provider=aws --no-provision --no-parallel && vagrant provision + # This will brink up worker machines in small parallel batches + $ vagrant/vagrant-up.sh --aws * Now you should be able to run tests: diff --git a/vagrant/README.md b/vagrant/README.md index 47c78767ddfd0..7021010bdc4cc 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -27,11 +27,15 @@ clusters concurrently. Now bring up the cluster: - $ vagrant up --no-provision && vagrant provision + $ vagrant/vagrant-up.sh + $ # If on aws, run: vagrant/vagrant-up.sh --aws -We separate out the two steps (bringing up the base VMs and configuring them) +(This essentially runs vagrant up --no-provision && vagrant hostmanager && vagrant provision) + +We separate out the steps (bringing up the base VMs, mapping hostnames, and configuring the VMs) due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to -collect IPs for all nodes before starting ZooKeeper nodes. +collect IPs for all nodes before starting ZooKeeper nodes. Breaking into multiple steps +also allows us to bring machies up in parallel on AWS. Once this completes: @@ -66,7 +70,7 @@ the cluster to your most recent development version. Finally, you can clean up the cluster by destroying all the VMs: - vagrant destroy + vagrant destroy -f ## Configuration ## @@ -75,6 +79,7 @@ You can override some default settings by specifying the values in only ever need to change a few simple configuration variables. Some values you might want to override: +* `enable_hostmanager` - true by default; override to false if on AWS to allow parallel cluster bringup. * `enable_dns` - Register each VM with a hostname in /etc/hosts on the hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only necessary if you want to address them conveniently from the host for tasks @@ -96,7 +101,7 @@ Install the `vagrant-aws` plugin to provide EC2 support: $ vagrant plugin install vagrant-aws Next, configure parameters in `Vagrantfile.local`. A few are *required*: -`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`enable_hostmanager`, `enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and `ec2_security_groups`. A couple of important notes: 1. You definitely want to use `enable_dns` if you plan to run clients outside of @@ -122,7 +127,7 @@ Next, configure parameters in `Vagrantfile.local`. A few are *required*: Now start things up, but specify the aws provider: - $ vagrant up --provider=aws --no-parallel --no-provision && vagrant provision + $ vagrant/vagrant-up.sh Your instances should get tagged with a name including your hostname to make them identifiable and make it easier to track instances in the AWS management diff --git a/vagrant/aws/aws-example-Vagrantfile.local b/vagrant/aws/aws-example-Vagrantfile.local index 00b3d6164ee71..853671eb6f3ab 100644 --- a/vagrant/aws/aws-example-Vagrantfile.local +++ b/vagrant/aws/aws-example-Vagrantfile.local @@ -18,6 +18,7 @@ # To use it, move it to the base kafka directory and rename # it to Vagrantfile.local, and adjust variables as needed. ec2_instance_type = "m3.xlarge" +enable_hostmanager = false num_zookeepers = 0 num_brokers = 0 num_workers = 9 diff --git a/vagrant/vagrant-up.sh b/vagrant/vagrant-up.sh new file mode 100755 index 0000000000000..ad5d5be0dce73 --- /dev/null +++ b/vagrant/vagrant-up.sh @@ -0,0 +1,237 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#! /usr/bin/env bash + +set -o nounset +set -o errexit # exit script if any command exits with nonzero value + +readonly PROG_NAME=$(basename $0) +readonly PROG_DIR=$(dirname $(realpath $0)) +readonly INVOKE_DIR=$(pwd) +readonly ARGS="$@" + +# overrideable defaults +AWS=false +PARALLEL=true +MAX_PARALLEL=5 + +readonly USAGE="Usage: $PROG_NAME [-h | --help] [--aws [--no-parallel] [--max-parallel MAX]]" +readonly HELP="$(cat < 0 ]]; do + key="$1" + case $key in + -h | --help) + help + ;; + --aws) + AWS=true + ;; + --no-parallel) + PARALLEL=false + ;; + --max-parallel) + MAX_PARALLEL="$2" + shift + ;; + *) + # unknown option + echo "Unknown option $1" + exit 1 + ;; +esac +shift # past argument or value +done + +# Get a list of vagrant machines (in any state) +function read_vagrant_machines { + local ignore_state="ignore" + local reading_state="reading" + local tmp_file="tmp-$RANDOM" + + local state="$ignore_state" + local machines="" + + while read -r line; do + # Lines before the first empty line are ignored + # The first empty line triggers change from ignore state to reading state + # When in reading state, we parse in machine names until we hit the next empty line, + # which signals that we're done parsing + if [[ -z "$line" ]]; then + if [[ "$state" == "$ignore_state" ]]; then + state="$reading_state" + else + # all done + echo "$machines" + return + fi + continue + fi + + # Parse machine name while in reading state + if [[ "$state" == "$reading_state" ]]; then + line=$(echo "$line" | cut -d ' ' -f 1) + if [[ -z "$machines" ]]; then + machines="$line" + else + machines="${machines} ${line}" + fi + fi + done < <(vagrant status) +} + +# Filter "list", returning a list of strings containing pattern as a substring +function filter { + local list="$1" + local pattern="$2" + + local result="" + for item in $list; do + if [[ ! -z "$(echo $item | grep "$pattern")" ]]; then + result="$result $item" + fi + done + echo "$result" +} + +# Given a list of machine names, return only test worker machines +function worker { + local machines="$1" + local workers=$(filter "$machines" "worker") + workers=$(echo "$workers" | xargs) # trim leading/trailing whitespace + echo "$workers" +} + +# Given a list of machine names, return only zookeeper and broker machines +function zk_broker { + local machines="$1" + local zk_broker_list=$(filter "$machines" "zk") + zk_broker_list="$zk_broker_list $(filter "$machines" "broker")" + zk_broker_list=$(echo "$zk_broker_list" | xargs) # trim leading/trailing whitespace + echo "$zk_broker_list" +} + +# Run a vagrant command on batches of machines of size $group_size +# This is annoying but necessary on aws to avoid errors due to AWS request rate +# throttling +# +# Example +# $ vagrant_batch_command "vagrant up" "m1 m2 m3 m4 m5" "2" +# +# This is equivalent to running "vagrant up" on groups of machines of size 2 or less, i.e.: +# $ vagrant up m1 m2 +# $ vagrant up m3 m4 +# $ vagrant up m5 +function vagrant_batch_command { + local vagrant_cmd="$1" + local machines="$2" + local group_size="$3" + + local count=1 + local m_group="" + # Using --provision flag makes this command useable both when bringing up a cluster from scratch, + # and when bringing up a halted cluster. Permissions on certain directores set during provisioning + # seem to revert when machines are halted, so --provision ensures permissions are set correctly in all cases + for machine in $machines; do + m_group="$m_group $machine" + + if [[ $(expr $count % $group_size) == 0 ]]; then + # We've reached a full group + # Bring up this part of the cluster + $vagrant_cmd $m_group + m_group="" + fi + ((count++)) + done + + # Take care of any leftover partially complete group + if [[ ! -z "$m_group" ]]; then + $vagrant_cmd $m_group + fi +} + +# We assume vagrant-hostmanager is installed, but may or may not be disabled during vagrant up +# In this fashion, we ensure we run hostmanager after machines are up, and before provisioning. +# This sequence of commands is necessary for example for bringing up a multi-node zookeeper cluster +function bring_up_local { + vagrant up --no-provision + vagrant hostmanager + vagrant provision +} + +function bring_up_aws { + local parallel="$1" + local max_parallel="$2" + local machines="$(read_vagrant_machines)" + + zk_broker_machines=$(zk_broker "$machines") + worker_machines=$(worker "$machines") + + if [[ "$parallel" == "true" ]]; then + if [[ ! -z "$zk_broker_machines" ]]; then + # We still have to bring up zookeeper/broker nodes serially + echo "Bringing up zookeeper/broker machines serially" + vagrant up --provider=aws --no-parallel --no-provision $zk_broker_machines + vagrant hostmanager + vagrant provision + fi + + if [[ ! -z "$worker_machines" ]]; then + echo "Bringing up test worker machines in parallel" + vagrant_batch_command "vagrant up --provider=aws" "$worker_machines" "$max_parallel" + vagrant hostmanager + fi + else + vagrant up --provider=aws --no-parallel --no-provision + vagrant hostmanager + vagrant provision + fi +} + +function main { + if [[ "$AWS" == "true" ]]; then + bring_up_aws "$PARALLEL" "$MAX_PARALLEL" + else + bring_up_local + fi +} + +main