diff --git a/.gitignore b/.gitignore index 99b32a6770e3d..06a64184eaa53 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,9 @@ TAGS .settings .gradle kafka.ipr -kafka.iws \ No newline at end of file +kafka.iws +.vagrant +Vagrantfile.local + +config/server-* +config/zookeeper-* diff --git a/README.md b/README.md index 9aca90664b2a8..11dfdf9379ad1 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,10 @@ Please note for this to work you should create/update `~/.gradle/gradle.properti ### Determining how transitive dependencies are added ### ./gradlew core:dependencies --configuration runtime +### Running in Vagrant ### + +See [vagrant/README.md](vagrant/README.md). + ### Contribution ### Apache Kafka is interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html). diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 0000000000000..55c67ddda4581 --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,168 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- mode: ruby -*- +# vi: set ft=ruby : + +require 'socket' + +# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! +VAGRANTFILE_API_VERSION = "2" + +# General config +enable_dns = false +num_zookeepers = 1 +num_brokers = 3 +num_workers = 0 # Generic workers that get the code, but don't start any services +ram_megabytes = 1280 + +# EC2 +ec2_access_key = ENV['AWS_ACCESS_KEY'] +ec2_secret_key = ENV['AWS_SECRET_KEY'] +ec2_keypair_name = nil +ec2_keypair_file = nil + +ec2_region = "us-east-1" +ec2_az = nil # Uses set by AWS +ec2_ami = "ami-9eaa1cf6" +ec2_instance_type = "m3.medium" +ec2_user = "ubuntu" +ec2_security_groups = nil +ec2_subnet_id = nil +# Only override this by setting it to false if you're running in a VPC and you +# are running Vagrant from within that VPC as well. +ec2_associate_public_ip = nil + +local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local") +if File.exists?(local_config_file) then + eval(File.read(local_config_file), binding, "Vagrantfile.local") +end + +# TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. +Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| + config.hostmanager.enabled = true + config.hostmanager.manage_host = enable_dns + config.hostmanager.include_offline = false + + ## Provider-specific global configs + config.vm.provider :virtualbox do |vb,override| + override.vm.box = "ubuntu/trusty64" + + override.hostmanager.ignore_private_ip = false + + # Brokers started with the standard script currently set Xms and Xmx to 1G, + # plus we need some extra head room. + vb.customize ["modifyvm", :id, "--memory", ram_megabytes.to_s] + + if Vagrant.has_plugin?("vagrant-cachier") + config.cache.scope = :box + # Besides the defaults, we use a custom cache to handle the Oracle JDK + # download, which downloads via wget during an apt install. Because of the + # way the installer ends up using its cache directory, we need to jump + # through some hoops instead of just specifying a cache directly -- we + # share to a temporary location and the provisioning scripts symlink data + # to the right location. + config.cache.enable :generic, { + "oracle-jdk7" => { cache_dir: "/tmp/oracle-jdk7-installer-cache" }, + } + end + end + + config.vm.provider :aws do |aws,override| + # The "box" is specified as an AMI + override.vm.box = "dummy" + override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box" + + override.hostmanager.ignore_private_ip = true + + override.ssh.username = ec2_user + override.ssh.private_key_path = ec2_keypair_file + + aws.access_key_id = ec2_access_key + aws.secret_access_key = ec2_secret_key + aws.keypair_name = ec2_keypair_name + + aws.region = ec2_region + aws.availability_zone = ec2_az + aws.instance_type = ec2_instance_type + aws.ami = ec2_ami + aws.security_groups = ec2_security_groups + aws.subnet_id = ec2_subnet_id + # If a subnet is specified, default to turning on a public IP unless the + # user explicitly specifies the option. Without a public IP, Vagrant won't + # be able to SSH into the hosts unless Vagrant is also running in the VPC. + if ec2_associate_public_ip.nil? + aws.associate_public_ip = true unless ec2_subnet_id.nil? + else + aws.associate_public_ip = ec2_associate_public_ip + end + + # Exclude some directories that can grow very large from syncing + config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['.git', 'core/data/', 'logs/', 'system_test/'] + end + + def name_node(node, name) + node.vm.hostname = name + node.vm.provider :aws do |aws| + aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name } + end + end + + def assign_local_ip(node, ip_address) + node.vm.provider :virtualbox do |vb,override| + override.vm.network :private_network, ip: ip_address + end + end + + ## Cluster definition + zookeepers = [] + (1..num_zookeepers).each { |i| + name = "zk" + i.to_s + zookeepers.push(name) + config.vm.define name do |zookeeper| + name_node(zookeeper, name) + ip_address = "192.168.50." + (10 + i).to_s + assign_local_ip(zookeeper, ip_address) + zookeeper.vm.provision "shell", path: "vagrant/base.sh" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh", :args => [i.to_s, num_zookeepers] + end + } + + (1..num_brokers).each { |i| + name = "broker" + i.to_s + config.vm.define name do |broker| + name_node(broker, name) + ip_address = "192.168.50." + (50 + i).to_s + assign_local_ip(broker, ip_address) + # We need to be careful about what we list as the publicly routable + # address since this is registered in ZK and handed out to clients. If + # host DNS isn't setup, we shouldn't use hostnames -- IP addresses must be + # used to support clients running on the host. + zookeeper_connect = zookeepers.map{ |zk_addr| zk_addr + ":2181"}.join(",") + broker.vm.provision "shell", path: "vagrant/base.sh" + broker.vm.provision "shell", path: "vagrant/broker.sh", :args => [i.to_s, enable_dns ? name : ip_address, zookeeper_connect] + end + } + + (1..num_workers).each { |i| + name = "worker" + i.to_s + config.vm.define name do |worker| + name_node(worker, name) + ip_address = "192.168.50." + (100 + i).to_s + assign_local_ip(worker, ip_address) + worker.vm.provision "shell", path: "vagrant/base.sh" + end + } + +end diff --git a/vagrant/README.md b/vagrant/README.md new file mode 100644 index 0000000000000..73cf0390bc4c7 --- /dev/null +++ b/vagrant/README.md @@ -0,0 +1,126 @@ +# Apache Kafka # + +Using Vagrant to get up and running. + +1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +2) Install Vagrant >= 1.6.4 [http://www.vagrantup.com/](http://www.vagrantup.com/) +3) Install Vagrant Plugins: + + # Required + $ vagrant plugin install vagrant-hostmanager + # Optional + $ vagrant plugin install vagrant-cachier # Caches & shares package downloads across VMs + +In the main Kafka folder, do a normal Kafka build: + + $ gradle + $ ./gradlew jar + +You can override default settings in `Vagrantfile.local`, which is a Ruby file +that is ignored by git and imported into the Vagrantfile. +One setting you likely want to enable +in `Vagrantfile.local` is `enable_dns = true` to put hostnames in the host's +/etc/hosts file. You probably want this to avoid having to use IP addresses when +addressing the cluster from outside the VMs, e.g. if you run a client on the +host. It's disabled by default since it requires `sudo` access, mucks with your +system state, and breaks with naming conflicts if you try to run multiple +clusters concurrently. + +Now bring up the cluster: + + $ vagrant up --no-provision && vagrant provision + +We separate out the two steps (bringing up the base VMs and configuring them) +due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to +collect IPs for all nodes before starting ZooKeeper nodes. + +Once this completes: + +* Zookeeper will be running on 192.168.50.11 (and `zk1` if you used enable_dns) +* Broker 1 on 192.168.50.51 (and `broker1` if you used enable_dns) +* Broker 2 on 192.168.50.52 (and `broker2` if you used enable_dns) +* Broker 3 on 192.168.50.53 (and `broker3` if you used enable_dns) + +To log into one of the machines: + + vagrant ssh + +You can access the brokers and zookeeper by their IP or hostname, e.g. + + # Specify ZooKeeper node 1 by it's IP: 192.168.50.11 + bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox + + # Specify brokers by their hostnames: broker1, broker2, broker3 + bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox + + # Specify ZooKeeper node by its hostname: zk1 + bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning + +If you need to update the running cluster, you can re-run the provisioner (the +step that installs software and configures services): + + vagrant provision + +Note that this doesn't currently ensure a fresh start -- old cluster state will +still remain intact after everything restarts. This can be useful for updating +the cluster to your most recent development version. + +Finally, you can clean up the cluster by destroying all the VMs: + + vagrant destroy + +## Configuration ## + +You can override some default settings by specifying the values in +`Vagrantfile.local`. It is interpreted as a Ruby file, although you'll probably +only ever need to change a few simple configuration variables. Some values you +might want to override: + +* `enable_dns` - Register each VM with a hostname in /etc/hosts on the + hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only + necessary if you want to address them conveniently from the host for tasks + that aren't provided by Vagrant. +* `num_zookeepers` - Size of zookeeper cluster +* `num_brokers` - Number of broker instances to run + + +## Using Other Providers ## + +### EC2 ### + +Install the `vagrant-aws` plugin to provide EC2 support: + + $ vagrant plugin install vagrant-aws + +Next, configure parameters in `Vagrantfile.local`. A few are *required*: +`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`ec2_security_groups`. A couple of important notes: + +1. You definitely want to use `enable_dns` if you plan to run clients outside of + the cluster (e.g. from your local host). If you don't, you'll need to go + lookup `vagrant ssh-config`. + +2. You'll have to setup a reasonable security group yourself. You'll need to + open ports for Zookeeper (2888 & 3888 between ZK nodes, 2181 for clients) and + Kafka (9092). Beware that opening these ports to all sources (e.g. so you can + run producers/consumers locally) will allow anyone to access your Kafka + cluster. All other settings have reasonable defaults for setting up an + Ubuntu-based cluster, but you may want to customize instance type, region, + AMI, etc. + +3. `ec2_access_key` and `ec2_secret_key` will use the environment variables + `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` respectively if they are set and not + overridden in `Vagrantfile.local`. + +4. If you're launching into a VPC, you must specify `ec2_subnet_id` (the subnet + in which to launch the nodes) and `ec2_security_groups` must be a list of + security group IDs instead of names, e.g. `sg-34fd3551` instead of + `kafka-test-cluster`. + +Now start things up, but specify the aws provider: + + $ vagrant up --provider=aws --no-parallel --no-provision && vagrant provision + +Your instances should get tagged with a name including your hostname to make +them identifiable and make it easier to track instances in the AWS management +console. diff --git a/vagrant/base.sh b/vagrant/base.sh new file mode 100644 index 0000000000000..6f28dfed67877 --- /dev/null +++ b/vagrant/base.sh @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +set -e + +if [ -z `which javac` ]; then + apt-get -y update + apt-get install -y software-properties-common python-software-properties + add-apt-repository -y ppa:webupd8team/java + apt-get -y update + + # Try to share cache. See Vagrantfile for details + mkdir -p /var/cache/oracle-jdk7-installer + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + find /tmp/oracle-jdk7-installer-cache/ -not -empty -exec cp '{}' /var/cache/oracle-jdk7-installer/ \; + fi + + /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections + apt-get -y install oracle-java7-installer oracle-java7-set-default + + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + cp -R /var/cache/oracle-jdk7-installer/* /tmp/oracle-jdk7-installer-cache + fi +fi + +chmod a+rw /opt +if [ ! -e /opt/kafka ]; then + ln -s /vagrant /opt/kafka +fi diff --git a/vagrant/broker.sh b/vagrant/broker.sh new file mode 100644 index 0000000000000..63f2d4f30c5a4 --- /dev/null +++ b/vagrant/broker.sh @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +# Usage: brokers.sh + +set -e + +BROKER_ID=$1 +PUBLIC_ADDRESS=$2 +PUBLIC_ZOOKEEPER_ADDRESSES=$3 + +cd /opt/kafka + +sed \ + -e 's/broker.id=0/'broker.id=$BROKER_ID'/' \ + -e 's/#advertised.host.name=/'advertised.host.name=$PUBLIC_ADDRESS'/' \ + -e 's/zookeeper.connect=localhost:2181/'zookeeper.connect=$PUBLIC_ZOOKEEPER_ADDRESSES'/' \ + /opt/kafka/config/server.properties > /opt/kafka/config/server-$BROKER_ID.properties + +echo "Killing server" +bin/kafka-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting server" +bin/kafka-server-start.sh /opt/kafka/config/server-$BROKER_ID.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & diff --git a/vagrant/zk.sh b/vagrant/zk.sh new file mode 100644 index 0000000000000..15517f826461d --- /dev/null +++ b/vagrant/zk.sh @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +# Usage: zk.sh + +set -e + +ZKID=$1 +NUM_ZK=$2 + +cd /opt/kafka + +cp /opt/kafka/config/zookeeper.properties /opt/kafka/config/zookeeper-$ZKID.properties +echo "initLimit=5" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "syncLimit=2" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "quorumListenOnAllIPs=true" >> /opt/kafka/config/zookeeper-$ZKID.properties +for i in `seq 1 $NUM_ZK`; do + echo "server.${i}=zk${i}:2888:3888" >> /opt/kafka/config/zookeeper-$ZKID.properties +done + +mkdir -p /tmp/zookeeper +echo "$ZKID" > /tmp/zookeeper/myid + +echo "Killing ZooKeeper" +bin/zookeeper-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting ZooKeeper" +bin/zookeeper-server-start.sh config/zookeeper-$ZKID.properties 1>> /tmp/zk.log 2>> /tmp/zk.log &