Skip to content

Commit

Permalink
KAFKA-1173 Using Vagrant to get up and running with Apache Kafka patc…
Browse files Browse the repository at this point in the history
…h by Ewen Cheslack-Postava reviewed by Joe Stein
  • Loading branch information
joestein committed Dec 5, 2014
1 parent 3cc10d5 commit 09e2fd6
Show file tree
Hide file tree
Showing 7 changed files with 427 additions and 1 deletion.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ TAGS
.settings
.gradle
kafka.ipr
kafka.iws
kafka.iws
.vagrant
Vagrantfile.local

config/server-*
config/zookeeper-*
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ Please note for this to work you should create/update `~/.gradle/gradle.properti
### Determining how transitive dependencies are added ###
./gradlew core:dependencies --configuration runtime

### Running in Vagrant ###

See [vagrant/README.md](vagrant/README.md).

### Contribution ###

Apache Kafka is interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html).
Expand Down
168 changes: 168 additions & 0 deletions Vagrantfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- mode: ruby -*-
# vi: set ft=ruby :

require 'socket'

# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"

# General config
enable_dns = false
num_zookeepers = 1
num_brokers = 3
num_workers = 0 # Generic workers that get the code, but don't start any services
ram_megabytes = 1280

# EC2
ec2_access_key = ENV['AWS_ACCESS_KEY']
ec2_secret_key = ENV['AWS_SECRET_KEY']
ec2_keypair_name = nil
ec2_keypair_file = nil

ec2_region = "us-east-1"
ec2_az = nil # Uses set by AWS
ec2_ami = "ami-9eaa1cf6"
ec2_instance_type = "m3.medium"
ec2_user = "ubuntu"
ec2_security_groups = nil
ec2_subnet_id = nil
# Only override this by setting it to false if you're running in a VPC and you
# are running Vagrant from within that VPC as well.
ec2_associate_public_ip = nil

local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local")
if File.exists?(local_config_file) then
eval(File.read(local_config_file), binding, "Vagrantfile.local")
end

# TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered.
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.hostmanager.enabled = true
config.hostmanager.manage_host = enable_dns
config.hostmanager.include_offline = false

## Provider-specific global configs
config.vm.provider :virtualbox do |vb,override|
override.vm.box = "ubuntu/trusty64"

override.hostmanager.ignore_private_ip = false

# Brokers started with the standard script currently set Xms and Xmx to 1G,
# plus we need some extra head room.
vb.customize ["modifyvm", :id, "--memory", ram_megabytes.to_s]

if Vagrant.has_plugin?("vagrant-cachier")
config.cache.scope = :box
# Besides the defaults, we use a custom cache to handle the Oracle JDK
# download, which downloads via wget during an apt install. Because of the
# way the installer ends up using its cache directory, we need to jump
# through some hoops instead of just specifying a cache directly -- we
# share to a temporary location and the provisioning scripts symlink data
# to the right location.
config.cache.enable :generic, {
"oracle-jdk7" => { cache_dir: "/tmp/oracle-jdk7-installer-cache" },
}
end
end

config.vm.provider :aws do |aws,override|
# The "box" is specified as an AMI
override.vm.box = "dummy"
override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box"

override.hostmanager.ignore_private_ip = true

override.ssh.username = ec2_user
override.ssh.private_key_path = ec2_keypair_file

aws.access_key_id = ec2_access_key
aws.secret_access_key = ec2_secret_key
aws.keypair_name = ec2_keypair_name

aws.region = ec2_region
aws.availability_zone = ec2_az
aws.instance_type = ec2_instance_type
aws.ami = ec2_ami
aws.security_groups = ec2_security_groups
aws.subnet_id = ec2_subnet_id
# If a subnet is specified, default to turning on a public IP unless the
# user explicitly specifies the option. Without a public IP, Vagrant won't
# be able to SSH into the hosts unless Vagrant is also running in the VPC.
if ec2_associate_public_ip.nil?
aws.associate_public_ip = true unless ec2_subnet_id.nil?
else
aws.associate_public_ip = ec2_associate_public_ip
end

# Exclude some directories that can grow very large from syncing
config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['.git', 'core/data/', 'logs/', 'system_test/']
end

def name_node(node, name)
node.vm.hostname = name
node.vm.provider :aws do |aws|
aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name }
end
end

def assign_local_ip(node, ip_address)
node.vm.provider :virtualbox do |vb,override|
override.vm.network :private_network, ip: ip_address
end
end

## Cluster definition
zookeepers = []
(1..num_zookeepers).each { |i|
name = "zk" + i.to_s
zookeepers.push(name)
config.vm.define name do |zookeeper|
name_node(zookeeper, name)
ip_address = "192.168.50." + (10 + i).to_s
assign_local_ip(zookeeper, ip_address)
zookeeper.vm.provision "shell", path: "vagrant/base.sh"
zookeeper.vm.provision "shell", path: "vagrant/zk.sh", :args => [i.to_s, num_zookeepers]
end
}

(1..num_brokers).each { |i|
name = "broker" + i.to_s
config.vm.define name do |broker|
name_node(broker, name)
ip_address = "192.168.50." + (50 + i).to_s
assign_local_ip(broker, ip_address)
# We need to be careful about what we list as the publicly routable
# address since this is registered in ZK and handed out to clients. If
# host DNS isn't setup, we shouldn't use hostnames -- IP addresses must be
# used to support clients running on the host.
zookeeper_connect = zookeepers.map{ |zk_addr| zk_addr + ":2181"}.join(",")
broker.vm.provision "shell", path: "vagrant/base.sh"
broker.vm.provision "shell", path: "vagrant/broker.sh", :args => [i.to_s, enable_dns ? name : ip_address, zookeeper_connect]
end
}

(1..num_workers).each { |i|
name = "worker" + i.to_s
config.vm.define name do |worker|
name_node(worker, name)
ip_address = "192.168.50." + (100 + i).to_s
assign_local_ip(worker, ip_address)
worker.vm.provision "shell", path: "vagrant/base.sh"
end
}

end
126 changes: 126 additions & 0 deletions vagrant/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Apache Kafka #

Using Vagrant to get up and running.

1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/)
2) Install Vagrant >= 1.6.4 [http://www.vagrantup.com/](http://www.vagrantup.com/)
3) Install Vagrant Plugins:

# Required
$ vagrant plugin install vagrant-hostmanager
# Optional
$ vagrant plugin install vagrant-cachier # Caches & shares package downloads across VMs

In the main Kafka folder, do a normal Kafka build:

$ gradle
$ ./gradlew jar

You can override default settings in `Vagrantfile.local`, which is a Ruby file
that is ignored by git and imported into the Vagrantfile.
One setting you likely want to enable
in `Vagrantfile.local` is `enable_dns = true` to put hostnames in the host's
/etc/hosts file. You probably want this to avoid having to use IP addresses when
addressing the cluster from outside the VMs, e.g. if you run a client on the
host. It's disabled by default since it requires `sudo` access, mucks with your
system state, and breaks with naming conflicts if you try to run multiple
clusters concurrently.

Now bring up the cluster:

$ vagrant up --no-provision && vagrant provision

We separate out the two steps (bringing up the base VMs and configuring them)
due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to
collect IPs for all nodes before starting ZooKeeper nodes.

Once this completes:

* Zookeeper will be running on 192.168.50.11 (and `zk1` if you used enable_dns)
* Broker 1 on 192.168.50.51 (and `broker1` if you used enable_dns)
* Broker 2 on 192.168.50.52 (and `broker2` if you used enable_dns)
* Broker 3 on 192.168.50.53 (and `broker3` if you used enable_dns)

To log into one of the machines:

vagrant ssh <machineName>

You can access the brokers and zookeeper by their IP or hostname, e.g.

# Specify ZooKeeper node 1 by it's IP: 192.168.50.11
bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox

# Specify brokers by their hostnames: broker1, broker2, broker3
bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox

# Specify ZooKeeper node by its hostname: zk1
bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning

If you need to update the running cluster, you can re-run the provisioner (the
step that installs software and configures services):

vagrant provision

Note that this doesn't currently ensure a fresh start -- old cluster state will
still remain intact after everything restarts. This can be useful for updating
the cluster to your most recent development version.

Finally, you can clean up the cluster by destroying all the VMs:

vagrant destroy

## Configuration ##

You can override some default settings by specifying the values in
`Vagrantfile.local`. It is interpreted as a Ruby file, although you'll probably
only ever need to change a few simple configuration variables. Some values you
might want to override:

* `enable_dns` - Register each VM with a hostname in /etc/hosts on the
hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only
necessary if you want to address them conveniently from the host for tasks
that aren't provided by Vagrant.
* `num_zookeepers` - Size of zookeeper cluster
* `num_brokers` - Number of broker instances to run


## Using Other Providers ##

### EC2 ###

Install the `vagrant-aws` plugin to provide EC2 support:

$ vagrant plugin install vagrant-aws

Next, configure parameters in `Vagrantfile.local`. A few are *required*:
`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and
`ec2_security_groups`. A couple of important notes:

1. You definitely want to use `enable_dns` if you plan to run clients outside of
the cluster (e.g. from your local host). If you don't, you'll need to go
lookup `vagrant ssh-config`.

2. You'll have to setup a reasonable security group yourself. You'll need to
open ports for Zookeeper (2888 & 3888 between ZK nodes, 2181 for clients) and
Kafka (9092). Beware that opening these ports to all sources (e.g. so you can
run producers/consumers locally) will allow anyone to access your Kafka
cluster. All other settings have reasonable defaults for setting up an
Ubuntu-based cluster, but you may want to customize instance type, region,
AMI, etc.

3. `ec2_access_key` and `ec2_secret_key` will use the environment variables
`AWS_ACCESS_KEY` and `AWS_SECRET_KEY` respectively if they are set and not
overridden in `Vagrantfile.local`.

4. If you're launching into a VPC, you must specify `ec2_subnet_id` (the subnet
in which to launch the nodes) and `ec2_security_groups` must be a list of
security group IDs instead of names, e.g. `sg-34fd3551` instead of
`kafka-test-cluster`.

Now start things up, but specify the aws provider:

$ vagrant up --provider=aws --no-parallel --no-provision && vagrant provision

Your instances should get tagged with a name including your hostname to make
them identifiable and make it easier to track instances in the AWS management
console.
43 changes: 43 additions & 0 deletions vagrant/base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#!/bin/bash

set -e

if [ -z `which javac` ]; then
apt-get -y update
apt-get install -y software-properties-common python-software-properties
add-apt-repository -y ppa:webupd8team/java
apt-get -y update

# Try to share cache. See Vagrantfile for details
mkdir -p /var/cache/oracle-jdk7-installer
if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then
find /tmp/oracle-jdk7-installer-cache/ -not -empty -exec cp '{}' /var/cache/oracle-jdk7-installer/ \;
fi

/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
apt-get -y install oracle-java7-installer oracle-java7-set-default

if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then
cp -R /var/cache/oracle-jdk7-installer/* /tmp/oracle-jdk7-installer-cache
fi
fi

chmod a+rw /opt
if [ ! -e /opt/kafka ]; then
ln -s /vagrant /opt/kafka
fi
Loading

0 comments on commit 09e2fd6

Please sign in to comment.