#Fabric
Fabric is a python1 library, which provides access to the command line. At Vokal it is used for a number of automated tasks most notably in our
CI environment, where we use it to update staging instances after merging a branch into master. Historically, we have used fabric to update the existing Docker image on an AWS instance
then restart the relevant upstart service and run any necessary migrations. While this works fine, it does introduce downtime which can be irritating for client developers. In an effort
to minimize downtime I've provided an updated fabfile which:
- Starts up another instance
- Updates that instance with the new docker image
- Runs the service
- Bakes an AMI of that new image.
- Does a health check, on the new instance
- If all that goes well, re-maps staging's elastic IP to the new instance and terminates the previous one.
import os
import time
from datetime import date
from base64 import b64decode
from fabric.api import *
from fabric.operations import *
import requests
from boto import ec2
def staging():
env.hosts = [env.STAGING_IP, ]
env.user = "ubuntu"
env.branch = "master"
def get_git_hash():
return local('git rev-parse --short HEAD', capture=True)
def create_image(conn, instance_id):
image_id = conn.create_image(
instance_id,
'{0}-{1}-{2}-{3}'.format(
env.IMAGE_TAG,
date.today().isoformat(),
get_git_hash(),
instance_id),
no_reboot=False)
print 'Created AMI {0}'.format(image_id)
return image_id
def get_instance(conn):
elastic_ips = conn.get_all_addresses([env.STAGING_IP, ])
assert len(elastic_ips) == 1
instance_id = elastic_ips[0].instance_id
reservations = conn.get_all_instances([instance_id])
assert len(reservations) == 1
assert len(reservations[0].instances) == 1
return reservations[0].instances[0]
def copy_instance(conn, inst):
# check if an AMI for this machine exists.
img_ids = conn.get_all_images(filters={'name': '*-{}'.format(inst.id)})
# if it doesn't create an image for this machine.
if not img_ids:
img_id = create_image(conn, inst.id)
else:
assert len(img_ids) == 1
img_id = img_ids[0].id
img = conn.get_image(img_id)
# make sure the AMI is ready to be consumed.
while img.state == 'pending':
print 'ami: {}'.format(img.state)
img.update()
time.sleep(5)
assert img.state == 'available'
# start new instance from AMI using the same data from the original instance.
res = conn.run_instances(img_id,
security_group_ids=[g.id for g in inst.groups],
user_data=b64decode(inst.get_attribute('userData')['userData']),
instance_type=inst.get_attribute('instanceType')['instanceType'],
key_name=inst.key_name)
assert len(res.instances) == 1
new_inst = res.instances[0]
# wait for instance to be running.
while new_inst.state == 'pending':
print 'instance {}'.format(new_inst.state)
new_inst.update()
time.sleep(5)
print new_inst.state
assert new_inst.state == 'running'
# attempts to SSH into machine, this is because Security Groups are applied after
# an instance runs.
success = False
with settings(host_string='ubuntu@{}'.format(new_inst.ip_address), warn_only=True):
for i in xrange(12):
try:
sudo('echo')
success = True
break
except:
print 'instance security group: pending'
time.sleep(10)
assert success
return new_inst
def update_instance(inst):
with settings(host_string='ubuntu@{}'.format(inst.ip_address), warn_only=True):
sudo("service {} stop".format(env.UPSTART_SERVICE_NAME))
sudo("docker rm $(sudo docker ps -aq)")
sudo("docker rmi $(sudo docker images --filter dangling=true --quiet)")
sudo("docker pull {}".format(env.DOCKER_IMAGE_NAME))
sudo("service {} start".format(env.UPSTART_SERVICE_NAME))
# wait for the container to spin up.
time.sleep(5)
def health_check(ip):
resp = requests.get(env.HEALTH_CHECK_FMT.format(ip))
return resp.status_code < 500
def remap_elastic_ip(conn, inst):
return conn.associate_address(inst.id, public_ip=STAGING_IP, allow_reassociation=True)
def migrate():
require("hosts", provided_by=[staging, ])
raise NotImplementedError()
def updates():
"""updates assumes the presense of these variables passed in through drone.
'STAGING_IP' which is the AWS's ELASTIC IP for staging
'REGION' which is the region the instance is in.
'AWS_ACCESS_KEY'
'AWS_SECRET_KEY'
'IMAGE_TAG' The AWS AMI tag name, which will prepend the current date, git-hash and instance_id
'UPSTART_SERVICE_NAME' the name of the service on the instance
'DOCKER_IMAGE_NAME' the name of the docker image.
'DOCKER_CONTAINER_NAME' the name of the docker container
'HEALTH_CHECK_FMT' url use in an HTTP GET request to check the health of the service.
"""
assert env.STAGING_IP
assert env.REGION
assert env.AWS_ACCESS_KEY
assert env.AWS_SECRET_KEY
assert env.IMAGE_TAG
assert env.UPSTART_SERVICE_NAME
assert env.DOCKER_IMAGE_NAME
assert env.DOCKER_CONTAINER_NAME
assert env.HEALTH_CHECK_FMT
conn = ec2.connect_to_region(
env.REGION,
aws_access_key_id=env.AWS_ACCESS_KEY,
aws_secret_access_key=env.AWS_SECRET_KEY)
assert conn
inst = get_instance(conn)
new_inst = copy_instance(conn, inst)
update_instance(new_inst)
if healthy:
create_image(conn, new_inst.id)
time.sleep(30) # give some time for new image to boot up.
healthy = False
for i in xrange(12):
if health_check(new_inst.id):
healthy = True
break
time.sleep(10)
assert healthy
assert remap_elastic_ip(conn, new_inst)
inst.terminate()
else:
new_inst.terminate()
raise Exception("Health check failure, new instance terminated.")
After setting the environment variables, run fab updates
. If any migrations or additional operations need to be run they can be implemented in the migrate
function and run with
fab staging migrate
1 Currently Fabric is only available for python 2.7