From 180b7e71c4197586cb1266e034813298f10db6ef Mon Sep 17 00:00:00 2001 From: kayrus Date: Thu, 11 Feb 2016 17:22:39 +0100 Subject: [PATCH 01/26] test: added scripts for functional tests --- build | 35 ++----- build-docker | 4 +- build-env | 23 +++++ functional/.gitignore | 1 + functional/README.md | 107 +++++++++++++++------ functional/Vagrantfile | 146 +++++++++++++++++++++++++++++ functional/clean.sh | 2 + functional/config.rb | 58 ++++++++++++ functional/platform/nspawn.go | 17 ++-- functional/provision/install_go.sh | 16 ++++ functional/run-in-vagrant | 7 ++ functional/start_etcd | 25 +++++ functional/test | 49 ++++++++++ functional/user-data | 12 +++ 14 files changed, 439 insertions(+), 63 deletions(-) create mode 100755 build-env create mode 100644 functional/.gitignore create mode 100644 functional/Vagrantfile create mode 100644 functional/config.rb create mode 100755 functional/provision/install_go.sh create mode 100755 functional/run-in-vagrant create mode 100755 functional/start_etcd create mode 100755 functional/test create mode 100644 functional/user-data diff --git a/build b/build index 7ac5497d1..3c32a330b 100755 --- a/build +++ b/build @@ -1,41 +1,24 @@ #!/bin/bash -e -# The -X format changed from go1.4 -> go1.5 -function go_linker_dashX { - local version=$(go version) - local regex="go([0-9]+).([0-9]+)." - if [[ $version =~ $regex ]]; then - if [ ${BASH_REMATCH[1]} -eq "1" -a ${BASH_REMATCH[2]} -le "4" ]; then - echo "$1 \"$2\"" - else - echo "$1=$2" - fi - else - echo "could not determine Go version" - exit 1 - fi -} +CDIR=$(cd `dirname $0` && pwd) +cd $CDIR ORG_PATH="github.com/coreos" REPO_PATH="${ORG_PATH}/fleet" VERSION=$(git describe --dirty) -GLDFLAGS="-X $(go_linker_dashX github.com/coreos/fleet/version.Version ${VERSION})" + +source build-env if [ ! -h gopath/src/${REPO_PATH} ]; then - mkdir -p gopath/src/${ORG_PATH} - ln -s ../../../.. gopath/src/${REPO_PATH} || exit 255 + mkdir -p gopath/src/${ORG_PATH} + ln -s ../../../.. gopath/src/${REPO_PATH} || exit 255 fi -export GOBIN=${PWD}/bin -export GOPATH=${PWD}/gopath - -eval $(go env) - if [ ${GOOS} = "linux" ]; then - echo "Building fleetd..." - CGO_ENABLED=0 go build -o bin/fleetd -a -installsuffix netgo -ldflags "${GLDFLAGS}" ${REPO_PATH}/fleetd + echo "Building fleetd..." + CGO_ENABLED=0 go build -o bin/fleetd -a -installsuffix netgo -ldflags "${GLDFLAGS}" ${REPO_PATH}/fleetd else - echo "Not on Linux - skipping fleetd build" + echo "Not on Linux - skipping fleetd build" fi echo "Building fleetctl..." diff --git a/build-docker b/build-docker index f120c2f77..cfc926389 100755 --- a/build-docker +++ b/build-docker @@ -1,3 +1,5 @@ #!/bin/bash -e -docker run --rm -v $PWD:/opt/fleet -u $(id -u):$(id -g) google/golang:1.4 /bin/bash -c "cd /opt/fleet && ./build" +CDIR=$(cd `dirname $0` && pwd) + +docker run --rm -v $CDIR:/opt/fleet -u $(id -u):$(id -g) google/golang:1.4 /bin/bash -c "cd /opt/fleet && ./build" diff --git a/build-env b/build-env new file mode 100755 index 000000000..9fa387f3c --- /dev/null +++ b/build-env @@ -0,0 +1,23 @@ +# The -X format changed from go1.4 -> go1.5 +function go_linker_dashX { + local version=$(go version) + local regex="go([0-9]+).([0-9]+)." + if [[ $version =~ $regex ]]; then + if [ ${BASH_REMATCH[1]} -eq "1" -a ${BASH_REMATCH[2]} -le "4" ]; then + echo "$1 \"$2\"" + else + echo "$1=$2" + fi + else + echo "could not determine Go version" + exit 1 + fi +} + +export GOBIN=${PWD}/bin +export GOPATH=${PWD}/gopath +export GLDFLAGS="-X $(go_linker_dashX github.com/coreos/fleet/version.Version ${VERSION})" +eval $(go env) +export PATH="${GOROOT}/bin:${PATH}" +export FLEETD_BIN="$(pwd)/bin/fleetd" +export FLEETCTL_BIN="$(pwd)/bin/fleetctl" diff --git a/functional/.gitignore b/functional/.gitignore new file mode 100644 index 000000000..a977916f6 --- /dev/null +++ b/functional/.gitignore @@ -0,0 +1 @@ +.vagrant/ diff --git a/functional/README.md b/functional/README.md index 815400d0e..b36aeb72f 100644 --- a/functional/README.md +++ b/functional/README.md @@ -2,52 +2,101 @@ This functional test suite deploys a fleet cluster using nspawn containers, and asserts fleet is functioning properly. -It shares an instance of etcd deployed on the host machine with each of the nspawn containers. +It shares an instance of etcd deployed on the host machine with each of the nspawn containers which use `172.18.0.1/16` network, so please make sure this network does not intersect with others. -It's recommended to run this in a virtual machine environment on CoreOS (e.g. using coreos-vagrant). The only dependency for the tests not provided on the CoreOS image is `go`. +It's recommended to run this in a virtual machine environment on CoreOS (e.g. using [Vagrant][test-in-vagrant]). -The caller must do three things before running the tests: +Since the tests utilize [`systemd-nspawn`][systemd-nspawn], this needs to be invoked as sudo/root. -1. Ensure an ssh-agent is running and the functional-testing identity is loaded. The `SSH_AUTH_SOCK` environment variable must be set. +If the tests are aborted partway through, it's currently possible for them to leave residual state as a result of the `systemd-nspawn` operations. This can be cleaned up using the `clean.sh` script. -``` -$ ssh-agent -$ ssh-add fleet/functional/fixtures/id_rsa -$ echo $SSH_AUTH_SOCK -/tmp/ssh-kwmtTOsL7978/agent.7978 -``` -2. Ensure the `FLEETD_BIN` and `FLEETCTL_BIN` environment variables point to the respective fleetd and fleetctl binaries that should be used to drive the actual tests. +### Run tests in Vagrant + +The recommended way to run the tests is to use the provided Vagrantfile, which will set up a single CoreOS instance with a one-member etcd cluster (configuration is applied using `user-data` [Cloud-Config][cloud-config] file located in this directory). +To do so, simply run the following commands on a system with Vagrant installed (see [Vagrant configuration][configure-vagrant] section of this doc) +```sh +$ git clone https://github.com/coreos/fleet +$ cd fleet/functional +$ ./run-in-vagrant ``` -$ export FLEETD_BIN=/path/to/fleetd -$ export FLEETCTL_BIN=/path/to/fleetctl + +Vagrant's provision step includes go binaries download using `functional/provision/install_go.sh` script. + +### Run tests inside other CoreOS platforms (QEMU/BareMetal/libvirt/etc) + +It's also possible to run the tests on CoreOS on other platforms. The following commands should be run *inside* the CoreOS instance. + +```sh +$ git clone https://github.com/coreos/fleet ``` -3. Make sure etcd is running on the host system. +If you didn't configure etcd2 daemon yet, just run this script: +```sh +$ sudo fleet/functional/start_etcd ``` -$ systemctl start etcd + +It will configure and start a one-member etcd cluster. + +Then run the functional tests (script will download and unpack golang into home directory): + +```sh +$ sudo fleet/functional/test ``` -Then the tests can be run with: +When `fleet/functional/test` can not find go binaries, it will download them automatically using `functional/provision/install_go.sh` script. +## Configure host environment to run Vagrant + +### Debian/Ubuntu + +#### Install Vagrant + +```sh +sudo apt-get install -y git nfs-kernel-server +wget https://releases.hashicorp.com/vagrant/1.8.1/vagrant_1.8.1_x86_64.deb +sudo dpkg -i vagrant_1.8.1_x86_64.deb ``` -# go test github.com/coreos/fleet/functional + +#### Install VirtualBox + +```sh +echo "deb http://download.virtualbox.org/virtualbox/debian $(lsb_release -sc) contrib" | sudo tee /etc/apt/sources.list.d/virtualbox.list +wget -q https://www.virtualbox.org/download/oracle_vbox.asc -O- | sudo apt-key add - +sudo apt-get update +sudo apt-get install -y build-essential dkms +sudo apt-get install -y VirtualBox-5.0 +#Previous VirtualBox (if you have problems with nested virtualization, more info here: https://www.virtualbox.org/ticket/14965) +#sudo apt-get install -y VirtualBox-4.3 ``` -Since the tests utilize `systemd-nspawn`, this needs to be invoked as sudo/root. +### CentOS/Fedora -An example test session using coreos-vagrant follows. This assumes that go is available in `/home/core/go` and the fleet repository in `/home/core/fleet` on the target machine (the easiest way to achieve this is to use shared folders). +**NOTE**: NFS and Vagrant doesn't work out of the box on CentOS 6.x, so it is recommended to use CentOS 7.x + +#### Install Vagrant + +```sh +sudo yum install -y git nfs-utils +sudo service nfs start +sudo yum install -y https://releases.hashicorp.com/vagrant/1.8.1/vagrant_1.8.1_x86_64.rpm ``` -vagrant ssh core-01 -- -A -export GOROOT="$(pwd)/go" -export PATH="${GOROOT}/bin:$PATH" -cd fleet -ssh-add functional/fixtures/id_rsa -export GOPATH="$(pwd)/gopath" -export FLEETD_BIN="$(pwd)/bin/fleetd" -export FLEETCTL_BIN="$(pwd)/bin/fleetctl" -sudo -E env PATH=$PATH go test github.com/coreos/fleet/functional -v + +#### Install VirtualBox + +```sh +source /etc/os-release +for id in $ID_LIKE $ID; do break; done +OS_ID=${id:-rhel} +curl http://download.virtualbox.org/virtualbox/rpm/$OS_ID/virtualbox.repo | sudo tee /etc/yum.repos.d/virtualbox.repo +sudo yum install -y make automake gcc gcc-c++ kernel-devel-`uname -r` dkms +sudo yum install -y VirtualBox-5.0 +#Previous VirtualBox (if you have problems with nested virtualization, more info here: https://www.virtualbox.org/ticket/14965) +#sudo yum install -y VirtualBox-4.3 ``` -If the tests are aborted partway through, it's currently possible for them to leave residual state as a result of the systemd-nspawn operations. This can be cleaned up using the `clean.sh` script. +[test-in-vagrant]: #run-tests-in-vagrant +[configure-vagrant]: #configure-host-environment-to-run-vagrant +[systemd-nspawn]: https://www.freedesktop.org/software/systemd/man/systemd-nspawn.html +[cloud-config]: https://github.com/coreos/coreos-cloudinit/blob/master/Documentation/cloud-config.md diff --git a/functional/Vagrantfile b/functional/Vagrantfile new file mode 100644 index 000000000..b2b71a66f --- /dev/null +++ b/functional/Vagrantfile @@ -0,0 +1,146 @@ +# -*- mode: ruby -*- +# # vi: set ft=ruby : +# Vagrantfile based on official CoreOS Vagranfile https://github.com/coreos/coreos-vagrant with one extra provision string + +require 'fileutils' + +Vagrant.require_version ">= 1.6.0" + +CLOUD_CONFIG_PATH = File.join(File.dirname(__FILE__), "user-data") +CONFIG = File.join(File.dirname(__FILE__), "config.rb") + +# Defaults for config options defined in CONFIG +$num_instances = 1 +$instance_name_prefix = "core" +$update_channel = "alpha" +$image_version = "current" +$enable_serial_logging = false +$share_home = false +$vm_gui = false +$vm_memory = 1024 +$vm_cpus = 1 +$shared_folders = {} +$forwarded_ports = {} + +# Attempt to apply the deprecated environment variable NUM_INSTANCES to +# $num_instances while allowing config.rb to override it +if ENV["NUM_INSTANCES"].to_i > 0 && ENV["NUM_INSTANCES"] + $num_instances = ENV["NUM_INSTANCES"].to_i +end + +if File.exist?(CONFIG) + require CONFIG +end + +# Use old vb_xxx config variables when set +def vm_gui + $vb_gui.nil? ? $vm_gui : $vb_gui +end + +def vm_memory + $vb_memory.nil? ? $vm_memory : $vb_memory +end + +def vm_cpus + $vb_cpus.nil? ? $vm_cpus : $vb_cpus +end + +Vagrant.configure("2") do |config| + # always use Vagrants insecure key + config.ssh.insert_key = false + + config.vm.box = "coreos-%s" % $update_channel + if $image_version != "current" + config.vm.box_version = $image_version + end + config.vm.box_url = "https://storage.googleapis.com/%s.release.core-os.net/amd64-usr/%s/coreos_production_vagrant.json" % [$update_channel, $image_version] + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v, override| + override.vm.box_url = "https://storage.googleapis.com/%s.release.core-os.net/amd64-usr/%s/coreos_production_vagrant_vmware_fusion.json" % [$update_channel, $image_version] + end + end + + config.vm.provider :virtualbox do |v| + # On VirtualBox, we don't have guest additions or a functional vboxsf + # in CoreOS, so tell Vagrant that so it can be smarter. + v.check_guest_additions = false + v.functional_vboxsf = false + end + + # plugin conflict + if Vagrant.has_plugin?("vagrant-vbguest") then + config.vbguest.auto_update = false + end + + (1..$num_instances).each do |i| + config.vm.define vm_name = "%s-%02d" % [$instance_name_prefix, i] do |config| + config.vm.hostname = vm_name + + if $enable_serial_logging + logdir = File.join(File.dirname(__FILE__), "log") + FileUtils.mkdir_p(logdir) + + serialFile = File.join(logdir, "%s-serial.txt" % vm_name) + FileUtils.touch(serialFile) + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v, override| + v.vmx["serial0.present"] = "TRUE" + v.vmx["serial0.fileType"] = "file" + v.vmx["serial0.fileName"] = serialFile + v.vmx["serial0.tryNoRxLoss"] = "FALSE" + end + end + + config.vm.provider :virtualbox do |vb, override| + vb.customize ["modifyvm", :id, "--uart1", "0x3F8", "4"] + vb.customize ["modifyvm", :id, "--uartmode1", serialFile] + end + end + + if $expose_docker_tcp + config.vm.network "forwarded_port", guest: 2375, host: ($expose_docker_tcp + i - 1), auto_correct: true + end + + $forwarded_ports.each do |guest, host| + config.vm.network "forwarded_port", guest: guest, host: host, auto_correct: true + end + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v| + v.gui = vm_gui + v.vmx['memsize'] = vm_memory + v.vmx['numvcpus'] = vm_cpus + end + end + + config.vm.provider :virtualbox do |vb| + vb.gui = vm_gui + vb.memory = vm_memory + vb.cpus = vm_cpus + end + + ip = "172.17.8.#{i+100}" + config.vm.network :private_network, ip: ip + + # Uncomment below to enable NFS for sharing the host machine into the coreos-vagrant VM. + #config.vm.synced_folder ".", "/home/core/share", id: "core", :nfs => true, :mount_options => ['nolock,vers=3,udp'] + $shared_folders.each_with_index do |(host_folder, guest_folder), index| + config.vm.synced_folder host_folder.to_s, guest_folder.to_s, id: "core-share%02d" % index, nfs: true, mount_options: ['nolock,vers=3,udp'] + end + + if $share_home + config.vm.synced_folder ENV['HOME'], ENV['HOME'], id: "home", :nfs => true, :mount_options => ['nolock,vers=3,udp'] + end + + if File.exist?(CLOUD_CONFIG_PATH) + config.vm.provision :file, :source => "#{CLOUD_CONFIG_PATH}", :destination => "/tmp/vagrantfile-user-data" + config.vm.provision :shell, :inline => "mv /tmp/vagrantfile-user-data /var/lib/coreos-vagrant/", :privileged => true + end + + config.vm.provision :shell, :path => "provision/install_go.sh", :privileged => false + + end + end +end diff --git a/functional/clean.sh b/functional/clean.sh index 0b3f25e0d..f86b75524 100755 --- a/functional/clean.sh +++ b/functional/clean.sh @@ -9,3 +9,5 @@ sudo rm -fr /run/systemd/system/*smoke* /tmp/smoke sudo systemctl daemon-reload ip link show fleet0 &>/dev/null && sudo ip link del fleet0 etcdctl rm --recursive /fleet_functional + +rm -f log diff --git a/functional/config.rb b/functional/config.rb new file mode 100644 index 000000000..27ae745dd --- /dev/null +++ b/functional/config.rb @@ -0,0 +1,58 @@ +# Size of the CoreOS cluster created by Vagrant +$num_instances=1 + +# coreos-vagrant is configured through a series of configuration +# options (global ruby variables) which are detailed below. To modify +# these options, first copy this file to "config.rb". Then simply +# uncomment the necessary lines, leaving the $, and replace everything +# after the equals sign.. + +# Change basename of the VM +# The default value is "core", which results in VMs named starting with +# "core-01" through to "core-${num_instances}". +#$instance_name_prefix="core" + +# Change the version of CoreOS to be installed +# To deploy a specific version, simply set $image_version accordingly. +# For example, to deploy version 709.0.0, set $image_version="709.0.0". +# The default value is "current", which points to the current version +# of the selected channel +#$image_version = "current" + +# Official CoreOS channel from which updates should be downloaded +$update_channel='stable' + +# Log the serial consoles of CoreOS VMs to log/ +# Enable by setting value to true, disable with false +# WARNING: Serial logging is known to result in extremely high CPU usage with +# VirtualBox, so should only be used in debugging situations +#$enable_serial_logging=false + +# Enable port forwarding of Docker TCP socket +# Set to the TCP port you want exposed on the *host* machine, default is 2375 +# If 2375 is used, Vagrant will auto-increment (e.g. in the case of $num_instances > 1) +# You can then use the docker tool locally by setting the following env var: +# export DOCKER_HOST='tcp://127.0.0.1:2375' +#$expose_docker_tcp=2375 + +# Enable NFS sharing of your home directory ($HOME) to CoreOS +# It will be mounted at the same path in the VM as on the host. +# Example: /Users/foobar -> /Users/foobar +#$share_home=false + +# Customize VMs +#$vm_gui = false +$vm_memory = 512 +$vm_cpus = 1 + +# Share additional folders to the CoreOS VMs +# For example, +# $shared_folders = {'/path/on/host' => '/path/on/guest', '/home/foo/app' => '/app'} +# or, to map host folders to guest folders of the same name, +# $shared_folders = Hash[*['/home/foo/app1', '/home/foo/app2'].map{|d| [d, d]}.flatten] +#$shared_folders = {} + +$shared_folders = {'../' => '/home/core/fleet'} + +# Enable port forwarding from guest(s) to host machine, syntax is: { 80 => 8080 }, auto correction is enabled by default. +#$forwarded_ports = {} diff --git a/functional/platform/nspawn.go b/functional/platform/nspawn.go index d642fd2d6..0b461d416 100644 --- a/functional/platform/nspawn.go +++ b/functional/platform/nspawn.go @@ -211,10 +211,10 @@ func (nc *nspawnCluster) prepCluster() (err error) { return } - if !strings.Contains(stdout, "172.17.0.1/16") { - _, _, err = run("ip addr add 172.17.0.1/16 dev fleet0") + if !strings.Contains(stdout, "172.18.0.1/16") { + _, _, err = run("ip addr add 172.18.0.1/16 dev fleet0") if err != nil { - log.Printf("Failed adding 172.17.0.1/16 to fleet0: %v", err) + log.Printf("Failed adding 172.18.0.1/16 to fleet0: %v", err) return } } @@ -251,7 +251,7 @@ func (nc *nspawnCluster) buildConfigDrive(dir, ip string) error { } defer userFile.Close() - etcd := "http://172.17.0.1:4001" + etcd := "http://172.18.0.1:4001" return util.BuildCloudConfig(userFile, ip, etcd, nc.keyspace()) } @@ -290,7 +290,7 @@ func (nc *nspawnCluster) createMember(id string) (m Member, err error) { nm := nspawnMember{ uuid: newMachineID(), id: id, - ip: fmt.Sprintf("172.17.1.%s", id), + ip: fmt.Sprintf("172.18.1.%s", id), } nc.members[nm.ID()] = nm @@ -303,13 +303,15 @@ func (nc *nspawnCluster) createMember(id string) (m Member, err error) { // minimum requirements for running systemd/coreos in a container fmt.Sprintf("mkdir -p %s/usr", fsdir), fmt.Sprintf("cp /etc/os-release %s/etc", fsdir), + fmt.Sprintf("echo 'core:x:500:500:CoreOS Admin:/home/core:/bin/bash' > %s/etc/passwd", fsdir), + fmt.Sprintf("echo 'core:x:500:' > %s/etc/group", fsdir), fmt.Sprintf("ln -s /proc/self/mounts %s/etc/mtab", fsdir), fmt.Sprintf("ln -s usr/lib64 %s/lib64", fsdir), fmt.Sprintf("ln -s lib64 %s/lib", fsdir), fmt.Sprintf("ln -s usr/bin %s/bin", fsdir), fmt.Sprintf("ln -s usr/sbin %s/sbin", fsdir), fmt.Sprintf("mkdir -p %s/home/core/.ssh", fsdir), - fmt.Sprintf("chown -R core:core %s/home/core", fsdir), + fmt.Sprintf("chown -R 500:500 %s/home/core", fsdir), // We don't need this, and it's slow, so mask it fmt.Sprintf("ln -s /dev/null %s/etc/systemd/system/systemd-udev-hwdb-update.service", fsdir), @@ -346,7 +348,7 @@ UseDNS no [Service] Type=oneshot RemainAfterExit=yes - ExecStart=/usr/bin/ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" -b 768` + ExecStart=/usr/bin/ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" -b 1024` if err = ioutil.WriteFile(path.Join(fsdir, "/etc/systemd/system/sshd-keygen.service"), []byte(sshd_keygen), 0644); err != nil { log.Printf("Failed writing sshd-keygen.service: %v", err) return @@ -395,6 +397,7 @@ UseDNS no return default: } + log.Printf("Dialing machine: %s", addr) c, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) if err == nil { c.Close() diff --git a/functional/provision/install_go.sh b/functional/provision/install_go.sh new file mode 100755 index 000000000..0e58cb52f --- /dev/null +++ b/functional/provision/install_go.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +export GOROOT=${HOME}/go +export PATH=${HOME}/go/bin:${PATH} + +gover=1.5.3 +gotar=go${gover}.linux-amd64.tar.gz +if [ ! -f ${HOME}/${gotar} ]; then + # Remove unfinished archive when you press Ctrl+C + trap "rm -f ${HOME}/${gotar}" INT TERM + wget --no-verbose https://storage.googleapis.com/golang/${gotar} -P ${HOME} +fi +tar -xf ${HOME}/${gotar} -C ${HOME} diff --git a/functional/run-in-vagrant b/functional/run-in-vagrant new file mode 100755 index 000000000..d528c6d27 --- /dev/null +++ b/functional/run-in-vagrant @@ -0,0 +1,7 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +cd $CDIR + +vagrant up +vagrant ssh core-01 -c "sudo ~/fleet/functional/test" diff --git a/functional/start_etcd b/functional/start_etcd new file mode 100755 index 000000000..84b853623 --- /dev/null +++ b/functional/start_etcd @@ -0,0 +1,25 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +if [[ -z "${SUDO_UID}" && "${USER_ID}" != "0" ]]; then + echo "Script should be run using sudo" + exit 1 +fi + +if [ ! -f ${HOME}/setup-network-environment ]; then + # Remove unfinished file when you press Ctrl+C + trap "rm -f ${HOME}/setup-network-environment" INT TERM + wget --no-verbose https://github.com/kelseyhightower/setup-network-environment/releases/download/1.0.1/setup-network-environment -P ${HOME} +fi + +if [ ! -x ${HOME}/setup-network-environment ]; then + chmod +x ${HOME}/setup-network-environment +fi + +${HOME}/setup-network-environment +source /etc/network-environment +export COREOS_PRIVATE_IPV4=$DEFAULT_IPV4 +coreos-cloudinit --from-file=${CDIR}/user-data diff --git a/functional/test b/functional/test new file mode 100755 index 000000000..0ccb464e0 --- /dev/null +++ b/functional/test @@ -0,0 +1,49 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +cd ${CDIR}/../ +export VERSION=$(git describe --dirty) +export GOROOT=${HOME}/go +export PATH=${HOME}/go/bin:${PATH} + +if [ ! -S "$SSH_AUTH_SOCK" ]; then + eval $(ssh-agent) +fi + +# github doesn't support explicit file permission set, this is workaround +chmod 0600 functional/fixtures/id_rsa +ssh-add functional/fixtures/id_rsa +sudo systemctl stop fleet || true + +if [[ ! $(go version 2>/dev/null) ]]; then + functional/provision/install_go.sh +fi + +if [ ! -x "bin/fleetd" ] || \ + [ ! -x "bin/fleetctl" ] || \ + [ ! $(bin/fleetctl | grep "$VERSION") ]; then + ./build +fi + +source build-env +eval $(go env) +go test github.com/coreos/fleet/functional -ldflags "${GLDFLAGS}" -v 2>&1 | tee functional/log + +total=$(grep -E '^--- (PASS|FAIL)' functional/log | wc -l) +pass=$(grep '^--- PASS' functional/log | wc -l) +fail=$(grep '^--- FAIL' functional/log | wc -l) + +echo "" +grep -E '^--- (PASS|FAIL)' functional/log + +echo "===========================================================" +echo "Functional test summary" +echo "===========================================================" +echo "# TOTAL: $total" +echo "# PASS: $pass" +echo "# FAIL: $fail" +echo "" +echo "See functional/log for the detailed output." diff --git a/functional/user-data b/functional/user-data new file mode 100644 index 000000000..9aba6418e --- /dev/null +++ b/functional/user-data @@ -0,0 +1,12 @@ +#cloud-config + +--- +coreos: + etcd2: + advertise-client-urls: http://$private_ipv4:2379 + listen-client-urls: http://0.0.0.0:2379,http://0.0.0.0:4001 + units: + - name: etcd2.service + command: start + update: + reboot-strategy: off From 570993905f83fffe1fc5f83608706ff97a59a950 Mon Sep 17 00:00:00 2001 From: kayrus Date: Wed, 24 Feb 2016 16:14:31 +0100 Subject: [PATCH 02/26] travis: bump go minor versions, add 1.6 --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8c44610c8..0b3f8a413 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,12 @@ language: go matrix: include: - - go: 1.4.2 + - go: 1.4.3 install: - go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/vet - - go: 1.5.1 + - go: 1.5.3 + - go: 1.6 script: - ./test From 9400eebd19fc779abfd73409491fbd25fe6c0cf7 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Tue, 16 Feb 2016 16:01:05 +0100 Subject: [PATCH 03/26] fleetctl: avoid hard coding sleep time values inside calls Just make it a const var that can be used or adapted if needed later. --- fleetctl/destroy.go | 2 +- fleetctl/fleetctl.go | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fleetctl/destroy.go b/fleetctl/destroy.go index 0d319174d..54c6a57b8 100644 --- a/fleetctl/destroy.go +++ b/fleetctl/destroy.go @@ -71,7 +71,7 @@ func runDestroyUnits(args []string) (exit int) { if u == nil { break } - time.Sleep(500 * time.Millisecond) + time.Sleep(defaultSleepTime) } } diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 951536f20..71785606d 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -59,7 +59,8 @@ recommended to upgrade fleetctl to prevent incompatibility issues. clientDriverAPI = "API" clientDriverEtcd = "etcd" - defaultEndpoint = "unix:///var/run/fleet.sock" + defaultEndpoint = "unix:///var/run/fleet.sock" + defaultSleepTime = 500 * time.Millisecond ) var ( @@ -771,7 +772,7 @@ func waitForUnitStates(units []string, js job.JobState, maxAttempts int, out io. func checkUnitState(name string, js job.JobState, maxAttempts int, out io.Writer, wg *sync.WaitGroup, errchan chan error) { defer wg.Done() - sleep := 500 * time.Millisecond + sleep := defaultSleepTime if maxAttempts < 1 { for { From 4a7182672482be5f99b3c8bfe2840d9529f81db9 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 12:16:52 +0100 Subject: [PATCH 04/26] fleetctl:destroy: on destroy check if the unit does not exist Add IsErrorUnitNotFound() and use it in destroy to check if the error indicates that the unit does not exist. For the moment we just want to check if the error is 'unit not found', if later we want more we may export a more generic function into cAPI. --- client/http.go | 4 ++++ fleetctl/destroy.go | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/client/http.go b/client/http.go index 3714f356f..63027c843 100644 --- a/client/http.go +++ b/client/http.go @@ -137,3 +137,7 @@ func is404(err error) bool { googerr, ok := err.(*googleapi.Error) return ok && googerr.Code == http.StatusNotFound } + +func IsErrorUnitNotFound(err error) bool { + return is404(err) +} diff --git a/fleetctl/destroy.go b/fleetctl/destroy.go index 54c6a57b8..4197e6890 100644 --- a/fleetctl/destroy.go +++ b/fleetctl/destroy.go @@ -16,6 +16,8 @@ package main import ( "time" + + "github.com/coreos/fleet/client" ) var cmdDestroyUnit = &Command{ @@ -42,6 +44,10 @@ func runDestroyUnits(args []string) (exit int) { for _, v := range units { err := cAPI.DestroyUnit(v.Name) if err != nil { + // Ignore 'Unit does not exist' error + if client.IsErrorUnitNotFound(err) { + continue + } stderr("Error destroying units: %v", err) exit = 1 continue From d2fca872011de0f37f3e602b23446735c898efe4 Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Fri, 12 Feb 2016 15:57:56 +0100 Subject: [PATCH 05/26] tests fleetctl destroy behavior --- fleetctl/destroy_test.go | 97 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 fleetctl/destroy_test.go diff --git a/fleetctl/destroy_test.go b/fleetctl/destroy_test.go new file mode 100644 index 000000000..508047877 --- /dev/null +++ b/fleetctl/destroy_test.go @@ -0,0 +1,97 @@ +// Copyright 2014 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "testing" + + "github.com/coreos/fleet/client" + "github.com/coreos/fleet/job" + "github.com/coreos/fleet/machine" + "github.com/coreos/fleet/registry" + "github.com/coreos/fleet/unit" +) + +func newFakeRegistryForDestroy() client.API { + // clear machineStates for every invocation + machineStates = nil + machines := []machine.MachineState{ + newMachineState("c31e44e1-f858-436e-933e-59c642517860", "1.2.3.4", map[string]string{"ping": "pong"}), + newMachineState("595989bb-cbb7-49ce-8726-722d6e157b4e", "5.6.7.8", map[string]string{"foo": "bar"}), + } + + jobs := []job.Job{ + job.Job{Name: "j1.service", Unit: unit.UnitFile{}, TargetMachineID: machines[0].ID}, + job.Job{Name: "j2.service", Unit: unit.UnitFile{}, TargetMachineID: machines[1].ID}, + } + + states := []unit.UnitState{ + unit.UnitState{ + UnitName: "j1.service", + LoadState: "loaded", + ActiveState: "active", + SubState: "listening", + MachineID: machines[0].ID, + }, + unit.UnitState{ + UnitName: "j2.service", + LoadState: "loaded", + ActiveState: "inactive", + SubState: "dead", + MachineID: machines[1].ID, + }, + } + + reg := registry.NewFakeRegistry() + reg.SetMachines(machines) + reg.SetUnitStates(states) + reg.SetJobs(jobs) + + return &client.RegistryClient{Registry: reg} +} + +// TestRunDestroyUnits checks for correct unit destruction +func TestRunDestroyUnits(t *testing.T) { + for _, s := range []struct { + Description string + DestroyUnits []string + ExpectedExit int + }{ + { + "destroy available units", + []string{"j1", "j2"}, + 0, + }, + { + "attempt to destroy available and non-available units", + []string{"j1", "j2", "j3"}, + 0, + }, + } { + cAPI = newFakeRegistryForDestroy() + exit := runDestroyUnits(s.DestroyUnits) + if exit != s.ExpectedExit { + t.Errorf("%s: expected exit code %d but received %d", + s.Description, s.ExpectedExit, exit) + } + for _, destroyedUnit := range s.DestroyUnits { + u, _ := cAPI.Unit(destroyedUnit) + if u != nil { + t.Errorf("%s: unit %s was not destroyed as requested", + s.Description, destroyedUnit) + } + } + } +} From bf183459ae81b7a2ddd15bac68c4a6fd95ef4116 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 17 Feb 2016 16:29:59 +0100 Subject: [PATCH 06/26] destroy_test: add a destroy test for non-existent units --- fleetctl/destroy_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fleetctl/destroy_test.go b/fleetctl/destroy_test.go index 508047877..36410b4b7 100644 --- a/fleetctl/destroy_test.go +++ b/fleetctl/destroy_test.go @@ -74,6 +74,11 @@ func TestRunDestroyUnits(t *testing.T) { []string{"j1", "j2"}, 0, }, + { + "destroy non-existent units", + []string{"y1", "y2"}, + 0, + }, { "attempt to destroy available and non-available units", []string{"j1", "j2", "j3"}, From b04be7130fc3f7e895817f69881f4c75de2dc264 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Thu, 18 Feb 2016 15:20:08 +0100 Subject: [PATCH 07/26] fleetctl:test: add appendJobsForTests() helper to automatically append jobs --- fleetctl/fleetctl_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fleetctl/fleetctl_test.go b/fleetctl/fleetctl_test.go index ca635dab6..51ac7372e 100644 --- a/fleetctl/fleetctl_test.go +++ b/fleetctl/fleetctl_test.go @@ -15,9 +15,11 @@ package main import ( + "fmt" "testing" "github.com/coreos/fleet/client" + "github.com/coreos/fleet/job" "github.com/coreos/fleet/machine" "github.com/coreos/fleet/registry" "github.com/coreos/fleet/unit" @@ -26,6 +28,19 @@ import ( "github.com/coreos/fleet/Godeps/_workspace/src/github.com/coreos/go-semver/semver" ) +func appendJobsForTests(jobs *[]job.Job, machine machine.MachineState, prefix string, unitCnt int) { + for i := 1; i <= unitCnt; i++ { + j := job.Job{ + Name: fmt.Sprintf("%s%d.service", prefix, i), + Unit: unit.UnitFile{}, + TargetMachineID: machine.ID, + } + *jobs = append(*jobs, j) + } + + return +} + func newFakeRegistryForCheckVersion(v string) registry.ClusterRegistry { sv, err := semver.NewVersion(v) if err != nil { From 33bddce4e67beeef1d409d1a4f2fdfd35550a2fa Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Thu, 18 Feb 2016 15:21:30 +0100 Subject: [PATCH 08/26] fleetctl:destroy_test: make the test more smarter by checking for race conditions Make the test more smarter by checking for race conditions and output result. Sometimes you may see normal output sometimes no output between the two goroutines which is normal, the thing to worry about is if Destroy did success or not. --- fleetctl/destroy_test.go | 105 +++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/fleetctl/destroy_test.go b/fleetctl/destroy_test.go index 36410b4b7..a6d1b513e 100644 --- a/fleetctl/destroy_test.go +++ b/fleetctl/destroy_test.go @@ -15,7 +15,10 @@ package main import ( + "fmt" + "sync" "testing" + "time" "github.com/coreos/fleet/client" "github.com/coreos/fleet/job" @@ -24,7 +27,13 @@ import ( "github.com/coreos/fleet/unit" ) -func newFakeRegistryForDestroy() client.API { +type DestroyTestResults struct { + Description string + DestroyUnits []string + ExpectedExit int +} + +func newFakeRegistryForDestroy(prefix string, unitCnt int) client.API { // clear machineStates for every invocation machineStates = nil machines := []machine.MachineState{ @@ -32,26 +41,31 @@ func newFakeRegistryForDestroy() client.API { newMachineState("595989bb-cbb7-49ce-8726-722d6e157b4e", "5.6.7.8", map[string]string{"foo": "bar"}), } - jobs := []job.Job{ - job.Job{Name: "j1.service", Unit: unit.UnitFile{}, TargetMachineID: machines[0].ID}, - job.Job{Name: "j2.service", Unit: unit.UnitFile{}, TargetMachineID: machines[1].ID}, - } + jobs := make([]job.Job, 0) + appendJobsForTests(&jobs, machines[0], prefix, unitCnt) + appendJobsForTests(&jobs, machines[1], prefix, unitCnt) - states := []unit.UnitState{ - unit.UnitState{ - UnitName: "j1.service", + states := make([]unit.UnitState, 0) + for i := 1; i <= unitCnt; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", prefix, i), LoadState: "loaded", ActiveState: "active", SubState: "listening", MachineID: machines[0].ID, - }, - unit.UnitState{ - UnitName: "j2.service", + } + states = append(states, state) + } + + for i := 1; i <= unitCnt; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", prefix, i), LoadState: "loaded", ActiveState: "inactive", SubState: "dead", MachineID: machines[1].ID, - }, + } + states = append(states, state) } reg := registry.NewFakeRegistry() @@ -62,16 +76,26 @@ func newFakeRegistryForDestroy() client.API { return &client.RegistryClient{Registry: reg} } +func doDestroyUnits(r DestroyTestResults, errchan chan error) { + exit := runDestroyUnits(r.DestroyUnits) + if exit != r.ExpectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.Description, r.ExpectedExit, exit) + } + for _, destroyedUnit := range r.DestroyUnits { + u, _ := cAPI.Unit(destroyedUnit) + if u != nil { + errchan <- fmt.Errorf("%s: unit %s was not destroyed as requested", r.Description, destroyedUnit) + } + } +} + // TestRunDestroyUnits checks for correct unit destruction func TestRunDestroyUnits(t *testing.T) { - for _, s := range []struct { - Description string - DestroyUnits []string - ExpectedExit int - }{ + unitPrefix := "j" + results := []DestroyTestResults{ { "destroy available units", - []string{"j1", "j2"}, + []string{"j1", "j2", "j3", "j4", "j5"}, 0, }, { @@ -81,22 +105,39 @@ func TestRunDestroyUnits(t *testing.T) { }, { "attempt to destroy available and non-available units", - []string{"j1", "j2", "j3"}, + []string{"y1", "y2", "y3", "y4", "j1", "j2", "j3", "j4", "j5", "y0"}, 0, }, - } { - cAPI = newFakeRegistryForDestroy() - exit := runDestroyUnits(s.DestroyUnits) - if exit != s.ExpectedExit { - t.Errorf("%s: expected exit code %d but received %d", - s.Description, s.ExpectedExit, exit) - } - for _, destroyedUnit := range s.DestroyUnits { - u, _ := cAPI.Unit(destroyedUnit) - if u != nil { - t.Errorf("%s: unit %s was not destroyed as requested", - s.Description, destroyedUnit) - } + } + + // Check with two goroutines we don't care we should just get + // the right result. If you happen to inspect this code for + // errors then you probably got hit by a race condition in + // Destroy command that should not happen + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForDestroy(unitPrefix, len(r.DestroyUnits)) + + wg.Add(2) + go func() { + defer wg.Done() + time.Sleep(2 * time.Microsecond) + doDestroyUnits(r, errchan) + }() + go func() { + defer wg.Done() + doDestroyUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) } } } From d463b1c6fada7b7ebf9ae063501ecb1c53a190cc Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 16:37:29 +0100 Subject: [PATCH 09/26] fleetctl:test: add commandTestResults struct and newFakeRegistryForCommands() Add newFakeRegistryForCommands() function so fleetctl command tests can use it. We will add stop and unload tests which will also use this function and commandTestResults struct. --- fleetctl/destroy_test.go | 77 +++++---------------------------------- fleetctl/fleetctl_test.go | 53 ++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 69 deletions(-) diff --git a/fleetctl/destroy_test.go b/fleetctl/destroy_test.go index a6d1b513e..e0e68c95a 100644 --- a/fleetctl/destroy_test.go +++ b/fleetctl/destroy_test.go @@ -1,4 +1,4 @@ -// Copyright 2014 CoreOS, Inc. +// Copyright 2016 CoreOS, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,73 +18,17 @@ import ( "fmt" "sync" "testing" - "time" - - "github.com/coreos/fleet/client" - "github.com/coreos/fleet/job" - "github.com/coreos/fleet/machine" - "github.com/coreos/fleet/registry" - "github.com/coreos/fleet/unit" ) -type DestroyTestResults struct { - Description string - DestroyUnits []string - ExpectedExit int -} - -func newFakeRegistryForDestroy(prefix string, unitCnt int) client.API { - // clear machineStates for every invocation - machineStates = nil - machines := []machine.MachineState{ - newMachineState("c31e44e1-f858-436e-933e-59c642517860", "1.2.3.4", map[string]string{"ping": "pong"}), - newMachineState("595989bb-cbb7-49ce-8726-722d6e157b4e", "5.6.7.8", map[string]string{"foo": "bar"}), - } - - jobs := make([]job.Job, 0) - appendJobsForTests(&jobs, machines[0], prefix, unitCnt) - appendJobsForTests(&jobs, machines[1], prefix, unitCnt) - - states := make([]unit.UnitState, 0) - for i := 1; i <= unitCnt; i++ { - state := unit.UnitState{ - UnitName: fmt.Sprintf("%s%d.service", prefix, i), - LoadState: "loaded", - ActiveState: "active", - SubState: "listening", - MachineID: machines[0].ID, - } - states = append(states, state) - } - - for i := 1; i <= unitCnt; i++ { - state := unit.UnitState{ - UnitName: fmt.Sprintf("%s%d.service", prefix, i), - LoadState: "loaded", - ActiveState: "inactive", - SubState: "dead", - MachineID: machines[1].ID, - } - states = append(states, state) - } - - reg := registry.NewFakeRegistry() - reg.SetMachines(machines) - reg.SetUnitStates(states) - reg.SetJobs(jobs) - - return &client.RegistryClient{Registry: reg} -} - -func doDestroyUnits(r DestroyTestResults, errchan chan error) { - exit := runDestroyUnits(r.DestroyUnits) - if exit != r.ExpectedExit { - errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.Description, r.ExpectedExit, exit) +func doDestroyUnits(r commandTestResults, errchan chan error) { + exit := runDestroyUnits(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) } - for _, destroyedUnit := range r.DestroyUnits { + for _, destroyedUnit := range r.units { u, _ := cAPI.Unit(destroyedUnit) if u != nil { - errchan <- fmt.Errorf("%s: unit %s was not destroyed as requested", r.Description, destroyedUnit) + errchan <- fmt.Errorf("%s: unit %s was not destroyed as requested", r.description, destroyedUnit) } } } @@ -92,14 +36,14 @@ func doDestroyUnits(r DestroyTestResults, errchan chan error) { // TestRunDestroyUnits checks for correct unit destruction func TestRunDestroyUnits(t *testing.T) { unitPrefix := "j" - results := []DestroyTestResults{ + results := []commandTestResults{ { "destroy available units", []string{"j1", "j2", "j3", "j4", "j5"}, 0, }, { - "destroy non-existent units", + "destroy non-available units", []string{"y1", "y2"}, 0, }, @@ -118,12 +62,11 @@ func TestRunDestroyUnits(t *testing.T) { var wg sync.WaitGroup errchan := make(chan error) - cAPI = newFakeRegistryForDestroy(unitPrefix, len(r.DestroyUnits)) + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) wg.Add(2) go func() { defer wg.Done() - time.Sleep(2 * time.Microsecond) doDestroyUnits(r, errchan) }() go func() { diff --git a/fleetctl/fleetctl_test.go b/fleetctl/fleetctl_test.go index 51ac7372e..0d50de40c 100644 --- a/fleetctl/fleetctl_test.go +++ b/fleetctl/fleetctl_test.go @@ -28,8 +28,57 @@ import ( "github.com/coreos/fleet/Godeps/_workspace/src/github.com/coreos/go-semver/semver" ) -func appendJobsForTests(jobs *[]job.Job, machine machine.MachineState, prefix string, unitCnt int) { - for i := 1; i <= unitCnt; i++ { +type commandTestResults struct { + description string + units []string + expectedExit int +} + +func newFakeRegistryForCommands(unitPrefix string, unitCount int) client.API { + // clear machineStates for every invocation + machineStates = nil + machines := []machine.MachineState{ + newMachineState("c31e44e1-f858-436e-933e-59c642517860", "1.2.3.4", map[string]string{"ping": "pong"}), + newMachineState("595989bb-cbb7-49ce-8726-722d6e157b4e", "5.6.7.8", map[string]string{"foo": "bar"}), + } + + jobs := make([]job.Job, 0) + appendJobsForTests(&jobs, machines[0], unitPrefix, unitCount) + appendJobsForTests(&jobs, machines[1], unitPrefix, unitCount) + + states := make([]unit.UnitState, 0) + for i := 1; i <= unitCount; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", unitPrefix, i), + LoadState: "loaded", + ActiveState: "active", + SubState: "listening", + MachineID: machines[0].ID, + } + states = append(states, state) + } + + for i := 1; i <= unitCount; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", unitPrefix, i), + LoadState: "loaded", + ActiveState: "inactive", + SubState: "dead", + MachineID: machines[1].ID, + } + states = append(states, state) + } + + reg := registry.NewFakeRegistry() + reg.SetMachines(machines) + reg.SetUnitStates(states) + reg.SetJobs(jobs) + + return &client.RegistryClient{Registry: reg} +} + +func appendJobsForTests(jobs *[]job.Job, machine machine.MachineState, prefix string, unitCount int) { + for i := 1; i <= unitCount; i++ { j := job.Job{ Name: fmt.Sprintf("%s%d.service", prefix, i), Unit: unit.UnitFile{}, From be1126f9799f0cd03ebcdb5a3693932da28a9caa Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 16:39:07 +0100 Subject: [PATCH 10/26] fleetctl:test: add some tests for fleetctl stop path --- fleetctl/stop_test.go | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 fleetctl/stop_test.go diff --git a/fleetctl/stop_test.go b/fleetctl/stop_test.go new file mode 100644 index 000000000..81bae97d2 --- /dev/null +++ b/fleetctl/stop_test.go @@ -0,0 +1,92 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "sync" + "testing" + + "github.com/coreos/fleet/job" +) + +func doStopUnits(r commandTestResults, errchan chan error) { + sharedFlags.NoBlock = true + + exit := runStopUnit(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) + } + + real_units, err := findUnits(r.units) + if err != nil { + errchan <- err + return + } + + // We assume that we reached the desired state + for _, v := range real_units { + if job.JobState(v.DesiredState) != job.JobStateLoaded { + errchan <- fmt.Errorf("Error: unit %s was not stopped as requested", v.Name) + } + } +} + +func TestRunStopUnits(t *testing.T) { + unitPrefix := "stop" + results := []commandTestResults{ + { + "stop available units", + []string{"stop1", "stop2", "stop3", "stop4", "stop5"}, + 0, + }, + { + "stop non-available units", + []string{"y1", "y2"}, + 0, + }, + { + "attempt to stop available and non-available units", + []string{"y1", "y2", "y3", "y4", "stop1", "stop2", "stop3", "stop4", "stop5", "y0"}, + 0, + }, + } + + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) + + wg.Add(2) + go func() { + defer wg.Done() + doStopUnits(r, errchan) + }() + go func() { + defer wg.Done() + doStopUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) + } + } +} From 063b97489d9a67128309d72be1cccacbcc2eed99 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 16:41:18 +0100 Subject: [PATCH 11/26] fleetctl:test: add some tests for fleetctl unload path --- fleetctl/unload_test.go | 92 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 fleetctl/unload_test.go diff --git a/fleetctl/unload_test.go b/fleetctl/unload_test.go new file mode 100644 index 000000000..ab1015bb7 --- /dev/null +++ b/fleetctl/unload_test.go @@ -0,0 +1,92 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "sync" + "testing" + + "github.com/coreos/fleet/job" +) + +func doUnloadUnits(r commandTestResults, errchan chan error) { + sharedFlags.NoBlock = true + + exit := runUnloadUnit(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) + } + + real_units, err := findUnits(r.units) + if err != nil { + errchan <- err + return + } + + // We assume that we reached the desired state + for _, v := range real_units { + if job.JobState(v.DesiredState) != job.JobStateInactive { + errchan <- fmt.Errorf("Error: unit %s was not unloaded as requested", v.Name) + } + } +} + +func TestRunUnloadUnits(t *testing.T) { + unitPrefix := "unload" + results := []commandTestResults{ + { + "unload available units", + []string{"unload1", "unload2", "unload3", "unload4", "unload5"}, + 0, + }, + { + "unload non-available units", + []string{"y1", "y2"}, + 0, + }, + { + "attempt to unload available and non-available units", + []string{"y1", "y2", "y3", "y4", "unload1", "unload2", "unload3", "unload4", "unload5", "y0"}, + 0, + }, + } + + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) + + wg.Add(2) + go func() { + defer wg.Done() + doUnloadUnits(r, errchan) + }() + go func() { + defer wg.Done() + doUnloadUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) + } + } +} From 50c78c693da013dfad5bafd6b96fc43f433f9f81 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 16:24:22 +0100 Subject: [PATCH 12/26] fleetctl:test: restore back sharedFlags.NoBlock when finishing --- fleetctl/stop_test.go | 8 ++++++-- fleetctl/unload_test.go | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fleetctl/stop_test.go b/fleetctl/stop_test.go index 81bae97d2..950337870 100644 --- a/fleetctl/stop_test.go +++ b/fleetctl/stop_test.go @@ -23,8 +23,6 @@ import ( ) func doStopUnits(r commandTestResults, errchan chan error) { - sharedFlags.NoBlock = true - exit := runStopUnit(r.units) if exit != r.expectedExit { errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) @@ -46,6 +44,11 @@ func doStopUnits(r commandTestResults, errchan chan error) { func TestRunStopUnits(t *testing.T) { unitPrefix := "stop" + oldNoBlock := sharedFlags.NoBlock + defer func() { + sharedFlags.NoBlock = oldNoBlock + }() + results := []commandTestResults{ { "stop available units", @@ -64,6 +67,7 @@ func TestRunStopUnits(t *testing.T) { }, } + sharedFlags.NoBlock = true for _, r := range results { var wg sync.WaitGroup errchan := make(chan error) diff --git a/fleetctl/unload_test.go b/fleetctl/unload_test.go index ab1015bb7..ca3fd6c5f 100644 --- a/fleetctl/unload_test.go +++ b/fleetctl/unload_test.go @@ -23,8 +23,6 @@ import ( ) func doUnloadUnits(r commandTestResults, errchan chan error) { - sharedFlags.NoBlock = true - exit := runUnloadUnit(r.units) if exit != r.expectedExit { errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) @@ -46,6 +44,11 @@ func doUnloadUnits(r commandTestResults, errchan chan error) { func TestRunUnloadUnits(t *testing.T) { unitPrefix := "unload" + oldNoBlock := sharedFlags.NoBlock + defer func() { + sharedFlags.NoBlock = oldNoBlock + }() + results := []commandTestResults{ { "unload available units", @@ -64,6 +67,7 @@ func TestRunUnloadUnits(t *testing.T) { }, } + sharedFlags.NoBlock = true for _, r := range results { var wg sync.WaitGroup errchan := make(chan error) From e50ee54bf7acafd2ee9032be3178f5c4e05f8403 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 24 Feb 2016 16:48:14 +0100 Subject: [PATCH 13/26] fleetctl:test: improve operation description for unload and stop tests --- fleetctl/stop_test.go | 2 +- fleetctl/unload_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fleetctl/stop_test.go b/fleetctl/stop_test.go index 950337870..cc6448c15 100644 --- a/fleetctl/stop_test.go +++ b/fleetctl/stop_test.go @@ -61,7 +61,7 @@ func TestRunStopUnits(t *testing.T) { 0, }, { - "attempt to stop available and non-available units", + "stop available and non-available units", []string{"y1", "y2", "y3", "y4", "stop1", "stop2", "stop3", "stop4", "stop5", "y0"}, 0, }, diff --git a/fleetctl/unload_test.go b/fleetctl/unload_test.go index ca3fd6c5f..84ff41676 100644 --- a/fleetctl/unload_test.go +++ b/fleetctl/unload_test.go @@ -61,7 +61,7 @@ func TestRunUnloadUnits(t *testing.T) { 0, }, { - "attempt to unload available and non-available units", + "unload available and non-available units", []string{"y1", "y2", "y3", "y4", "unload1", "unload2", "unload3", "unload4", "unload5", "y0"}, 0, }, From aae37e178650ac1eb22fe656931e07fd8787d33d Mon Sep 17 00:00:00 2001 From: kayrus Date: Fri, 5 Feb 2016 17:43:08 +0100 Subject: [PATCH 14/26] docs: remove D-Bus and polkit note and added new fleetd CLI parameters. This wiki page is outdated http://www.freedesktop.org/wiki/Software/systemd/dbus/ Polkit rule was already implemented in this PR: https://github.com/coreos/coreos-overlay/pull/1579 --- Documentation/architecture.md | 20 ++- Documentation/deployment-and-configuration.md | 134 ++++++++++++++---- 2 files changed, 120 insertions(+), 34 deletions(-) diff --git a/Documentation/architecture.md b/Documentation/architecture.md index 6f43e7f55..80464882f 100644 --- a/Documentation/architecture.md +++ b/Documentation/architecture.md @@ -50,18 +50,28 @@ A UnitState object represents the state of a Unit in the fleet engine. A UnitSta ## Preview Release -Current releases of fleet don't currently perform any authentication or authorization for submitted units. This means that any client that can access your etcd cluster can potentially run arbitrary code on many of your machines very easily. +Current releases of fleet don't currently perform any authentication or authorization for submitted units. This means that any client that can access your etcd cluster can potentially run arbitrary code on many of your machines very easily, thus it is strongly recommended to enable [TLS authentication][etcd-security] on the etcd side, set proper file permissions to the keypair on the host and [configure fleet][fleet-tls] to use keypair. ## Securing etcd You should avoid public access to etcd and instead run fleet [from your local laptop][using-the-client] with the `--tunnel` flag to run commands over an SSH tunnel. You can alias this flag for easier usage: `alias fleetctl=fleetctl --tunnel 10.10.10.10` - or use the environment variable `FLEETCTL_TUNNEL`. -## Other Notes +## Securing fleetd -Since it interacts directly with systemd over D-Bus, the fleetd daemon must be run with elevated privileges (i.e. as root) in order to perform operations like starting and stopping services. From the [systemd D-Bus documentation][systemd-dbus]: +It is also recommended to run fleetd under separate `fleet` user and group, and set the permissions of the fleetd API's listening Unix socket to `0660`. This will require local user to be in `fleet` group to perform an action with fleetd. Since the fleet daemon uses [D-Bus][d-bus] to communicate with systemd it is necessary to create a [`polkit(8)`][polkit] rule to allow fleetd to communicate with systemd: -> In contrast to most of the other services of the systemd suite PID 1 does not use PolicyKit for controlling access to privileged operations, but relies exclusively on the low-level D-Bus policy language. (This is done in order to avoid a cyclic dependency between PolicyKit and systemd/PID 1.) This means that sensitive operations exposed by PID 1 on the bus are generally not available to unprivileged processes directly. +```js +polkit.addRule(function(action, subject) { + if (action.id.indexOf("org.freedesktop.systemd1.") == 0 && + subject.user == "fleet") { + return polkit.Result.YES; + } +}); +``` +[etcd-security]: https://github.com/coreos/etcd/blob/master/Documentation/security.md +[d-bus]: https://www.freedesktop.org/wiki/Software/dbus/ +[fleet-tls]: deployment-and-configuration.md#tls-authentication +[polkit]: https://www.freedesktop.org/software/polkit/docs/latest/polkit.8.html [states documentation]: states.md [using-the-client]: using-the-client.md#get-up-and-running -[systemd-dbus]: http://www.freedesktop.org/wiki/Software/systemd/dbus/ diff --git a/Documentation/deployment-and-configuration.md b/Documentation/deployment-and-configuration.md index e0f26803d..ecffa3405 100644 --- a/Documentation/deployment-and-configuration.md +++ b/Documentation/deployment-and-configuration.md @@ -8,7 +8,54 @@ Deploying `fleet` on CoreOS is even simpler: just run `systemctl start fleet`. T Each `fleetd` daemon must be configured to talk to the same [etcd cluster][etcd]. By default, the `fleetd` daemon will connect to either http://127.0.0.1:2379 or http://127.0.0.1:4001, depending on which endpoint responds. Refer to the configuration documentation below for customization help. -`fleet` requires etcd be of version 0.3.0+. +`fleet` requires etcd be of version 0.3.0+ but it is recommended to use etcd 2.0.0+ which supports [TLS authentication][etcd-security]. + +### TLS Authentication + +If your etcd cluster has [TLS authentication][etcd-security] enabled, you will need to configure fleet to use an appropriate TLS keypair. The examples below show how to achieve this: + +#### Using systemd Drop-Ins + +```ini +[Service] +Environment="FLEET_ETCD_CAFILE=/etc/ssl/etcd/ca.pem" +Environment="FLEET_ETCD_CERTFILE=/etc/ssl/etcd/client.pem" +Environment="FLEET_ETCD_KEYFILE=/etc/ssl/etcd/client-key.pem" +Environment="FLEET_ETCD_SERVERS=https://172.16.0.101:2379,https://172.16.0.102:2379,https://172.16.0.103:2379" +Environment="FLEET_METADATA=hostname=server1" +Environment="FLEET_PUBLIC_IP=172.16.0.101" +``` + +#### Using CLI paramenters + +```sh +fleetd --etcd-cafile /etc/ssl/etcd/ca.pem \ + --etcd-keyfile /etc/ssl/etcd/client-key.pem \ + --etcd-certfile /etc/ssl/etcd/client.pem \ + --etcd-servers https://192.0.2.12:2379 +``` + +#### Using CoreOS Cloud Config + +```yaml +#cloud-config + +coreos: + fleet: + etcd_servers: "https://192.0.2.12:2379" + etcd_cafile: /etc/ssl/etcd/ca.pem + etcd_certfile: /etc/ssl/etcd/client.pem + etcd_keyfile: /etc/ssl/etcd/client-key.pem +``` + +#### Using fleet configuration file + +```ini +etcd_servers=["https://192.0.2.12:2379"] +etcd_cafile=/etc/ssl/etcd/ca.pem +etcd_certfile=/etc/ssl/etcd/client.pem +etcd_keyfile=/etc/ssl/etcd/client-key.pem +``` ## systemd @@ -20,15 +67,15 @@ The `fleetctl` client tool uses SSH to interact with a fleet cluster. This means Authorizing a public SSH key is typically as easy as appending it to the user's `~/.ssh/authorized_keys` file. This may not be true on your systemd, though. If running CoreOS, use the built-in `update-ssh-keys` utility - it helps manage multiple authorized keys. -To make things incredibly easy, included in the [fleet source][fleetctl-inject-ssh] is a script that will distribute SSH keys across a fleet cluster running on CoreOS. Simply pipe the contents of a public SSH key into the script: +To make things incredibly easy, included in the [fleet source][fleet-inject-ssh] is a script that will distribute SSH keys across a fleet cluster running on CoreOS. Simply pipe the contents of a public SSH key into the script: -``` +```sh cat ~/.ssh/id_rsa.pub | ./fleetctl-inject-ssh.sh simon ``` All but the first argument to `fleetctl-inject-ssh.sh` are passed directly to `fleetctl`. -``` +```sh cat ~/.ssh/id_rsa.pub | ./fleetctl-inject-ssh.sh simon --tunnel 19.12.0.33 ``` @@ -40,14 +87,14 @@ The configuration of these interfaces is managed through a [systemd socket unit] CoreOS ships a socket unit for fleet (`fleet.socket`) which binds to a Unix domain socket, `/var/run/fleet.sock`. Unix socket is accessible using tool such as curl (v7.40 or greater): `curl --unix-socket /var/run/fleet.sock http:/fleet/v1/units`. To serve the fleet API over a network address, simply extend or replace this socket unit. -For example, writing the following [drop-in] to `/etc/systemd/system/fleet.socket.d/30-ListenStream.conf` would enable fleet to be reached over the local port `49153` in addition to `/var/run/fleet.sock`: +For example, writing the following [drop-in][drop-in] to `/etc/systemd/system/fleet.socket.d/30-ListenStream.conf` would enable fleet to be reached over the local port `49153` in addition to `/var/run/fleet.sock`: -``` +```ini [Socket] ListenStream=127.0.0.1:49153 ``` -After you've written the file, call `systemctl daemon-reload` to load the new [drop-in], followed by `systemctl stop fleet.service; systemctl restart fleet.socket; systemctl start fleet.service`. +After you've written the file, call `systemctl daemon-reload` to load the new [drop-in][drop-in], followed by `systemctl stop fleet.service; systemctl restart fleet.socket; systemctl start fleet.service`. Once the socket is running, the fleet API will be available at `http://${ListenStream}/fleet/v1`, where `${ListenStream}` is the value of the `ListenStream` option used in your socket file. This endpoint is accessible directly using tools such as curl and wget, or you can use fleetctl like so: `fleetctl --endpoint http://${ListenStream} `. @@ -65,93 +112,122 @@ The `fleetd` daemon uses two sources for configuration parameters: fleet will look at `/etc/fleet/fleet.conf` for this config file by default. The `--config` flag may be passed to the `fleetd` binary to use a custom config file location. The options that may be set are defined below. Note that each of the options should be defined at the global level, outside of any INI sections. -Environment variables may also provide configuration options. Options provided in an environment variable will override the corresponding option provided in a config file. To use an environment variable, simply prefix the name of a given option with `FLEET_`, while uppercasing the rest of the name. For example, to set the `etcd_servers` option to 'http://192.0.2.12:2379' when running the fleetd binary: +Environment variables may also provide configuration options. Options provided in an environment variable will override the corresponding option provided in a config file. To use an environment variable, simply prefix the name of a given option with `FLEET_`, while uppercasing the rest of the name. For example, to set the `--etcd-servers` option to 'http://192.0.2.12:2379' when running the fleetd binary: -``` +```sh $ FLEET_ETCD_SERVERS=http://192.0.2.12:2379 /usr/bin/fleetd ``` ## General Options -#### verbosity +#### --verbosity Enable debug logging by setting this to an integer value greater than zero. Only a single debug level exists, so all values greater than zero are considered equivalent. Default: 0 -#### etcd_servers +#### --etcd-servers Provide a custom set of etcd endpoints. Default: "http://127.0.0.1:2379,http://127.0.0.1:4001" -#### etcd_request_timeout +#### --etcd-request-timeout Amount of time in seconds to allow a single etcd request before considering it failed. Default: 1.0 -#### etcd_cafile, etcd_keyfile, etcd_certfile +#### --etcd-cafile, --etcd-keyfile, --etcd-certfile Provide TLS configuration when SSL certificate authentication is enabled in etcd endpoints Default: "" -#### etcd_key_prefix +#### --etcd-key-prefix Keyspace path for fleet data in etcd. Default: "/_coreos.com/fleet/" -#### public_ip +#### --public-ip IP address that should be published with the local Machine's state and any socket information. If not set, fleetd will attempt to detect the IP it should publish based on the machine's IP routing information. Default: "" -#### metadata +#### --metadata Comma-delimited key/value pairs that are published with the local to the fleet registry. This data can be used directly by a client of fleet to make scheduling decisions. An example set of metadata could look like: - metadata="region=us-west,az=us-west-1" - metadata='region=us-west,az=us-west-1' - metadata=region=us-west,az=us-west-1 +```ini +metadata="region=us-west,az=us-west-1" +metadata='region=us-west,az=us-west-1' +metadata=region=us-west,az=us-west-1 +``` The value of the metadata option should conform to one of these three forms: - - metadata="STRING" - metadata='STRING' - metadata=STRING + +```ini +metadata="STRING" +metadata='STRING' +metadata=STRING +``` ...while STRING is one of: - yyy[,yyy[,yyy...]] +```ini +yyy[,yyy[,yyy...]] +``` ...and yyy is one of: - key=value +```ini +key=value +``` Space and tab characters will be stripped around the equals sign and around each comma. If the same key is defined more than once, the last value overwrites the previous value(s). Default: "" -#### agent_ttl +#### --agent-ttl An Agent will be considered dead if it exceeds this amount of time to communicate with the Registry. The agent will attempt a heartbeat at half of this value. Default: "30s" -#### engine_reconcile_interval +#### --engine-reconcile-interval Interval in seconds at which the engine should reconcile the cluster schedule in etcd. Default: 2 -[etcd]: https://github.com/coreos/docs/blob/master/etcd/getting-started-with-etcd.md +#### --token-limit + +Maximum number of entries per page returned from API requests. + +Default: "100" + +### --disable-engine + +Disable the engine entirely, use with care. You can find more info about this option in [fleet scaling doc][fleet-scale]. + +Default: false + +### --disable-watches + +Disable the use of etcd watches. Increases scheduling latency. You can find more info about this option in [fleet scaling doc][fleet-scale]. + +Default: false + [api-doc]: api-v1.md -[fleetctl-inject-ssh]: /scripts/fleetctl-inject-ssh.sh +[config]: /fleet.conf.sample +[etcd]: https://github.com/coreos/docs/blob/master/etcd/getting-started-with-etcd.md +[etcd-security]: https://github.com/coreos/etcd/blob/master/Documentation/security.md +[fleet-inject-ssh]: /scripts/fleetctl-inject-ssh.sh +[fleet-scale]: fleet-scaling.md#implemented-quick-wins [socket-unit]: http://www.freedesktop.org/software/systemd/man/systemd.socket.html [config]: /fleet.conf.sample [drop-in]: https://github.com/coreos/docs/blob/master/os/using-systemd-drop-in-units.md From 87a51d3f052b64f1e2cf81b370d1f20d42bf749d Mon Sep 17 00:00:00 2001 From: kayrus Date: Tue, 1 Mar 2016 18:06:08 +0100 Subject: [PATCH 15/26] docs: purged CLI parameters info (was added by mistake) --- Documentation/architecture.md | 2 +- Documentation/deployment-and-configuration.md | 35 +++++++------------ Documentation/fleet-scaling.md | 6 ++-- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/Documentation/architecture.md b/Documentation/architecture.md index 80464882f..af93a4874 100644 --- a/Documentation/architecture.md +++ b/Documentation/architecture.md @@ -11,7 +11,7 @@ Every system in the fleet cluster runs a single `fleetd` daemon. Each daemon enc - The engine uses a _lease model_ to enforce that only one engine is running at a time. Every time a reconciliation is due, an engine will attempt to take a lease on etcd. If the lease succeeds, the reconciliation proceeds; otherwise, that engine will remain idle until the next reconciliation period begins. - The engine uses a simplistic "least-loaded" scheduling algorithm: when considering where to schedule a given unit, preference is given to agents running the smallest number of units. -The reconciliation loop of the engine can be disabled with the `--disable-engine` flag. This means that +The reconciliation loop of the engine can be disabled with the `disable_engine` config flag. This means that this `fleetd` daemon will *never* become a cluster leader. If all running daemons have this setting, your cluster is dead; i.e. no jobs will be scheduled. Use with care. diff --git a/Documentation/deployment-and-configuration.md b/Documentation/deployment-and-configuration.md index ecffa3405..f579b0c96 100644 --- a/Documentation/deployment-and-configuration.md +++ b/Documentation/deployment-and-configuration.md @@ -26,15 +26,6 @@ Environment="FLEET_METADATA=hostname=server1" Environment="FLEET_PUBLIC_IP=172.16.0.101" ``` -#### Using CLI paramenters - -```sh -fleetd --etcd-cafile /etc/ssl/etcd/ca.pem \ - --etcd-keyfile /etc/ssl/etcd/client-key.pem \ - --etcd-certfile /etc/ssl/etcd/client.pem \ - --etcd-servers https://192.0.2.12:2379 -``` - #### Using CoreOS Cloud Config ```yaml @@ -112,7 +103,7 @@ The `fleetd` daemon uses two sources for configuration parameters: fleet will look at `/etc/fleet/fleet.conf` for this config file by default. The `--config` flag may be passed to the `fleetd` binary to use a custom config file location. The options that may be set are defined below. Note that each of the options should be defined at the global level, outside of any INI sections. -Environment variables may also provide configuration options. Options provided in an environment variable will override the corresponding option provided in a config file. To use an environment variable, simply prefix the name of a given option with `FLEET_`, while uppercasing the rest of the name. For example, to set the `--etcd-servers` option to 'http://192.0.2.12:2379' when running the fleetd binary: +Environment variables may also provide configuration options. Options provided in an environment variable will override the corresponding option provided in a config file. To use an environment variable, simply prefix the name of a given option with `FLEET_`, while uppercasing the rest of the name. For example, to set the `etcd_servers` option to 'http://192.0.2.12:2379' when running the fleetd binary: ```sh $ FLEET_ETCD_SERVERS=http://192.0.2.12:2379 /usr/bin/fleetd @@ -120,45 +111,45 @@ $ FLEET_ETCD_SERVERS=http://192.0.2.12:2379 /usr/bin/fleetd ## General Options -#### --verbosity +#### verbosity Enable debug logging by setting this to an integer value greater than zero. Only a single debug level exists, so all values greater than zero are considered equivalent. Default: 0 -#### --etcd-servers +#### etcd_servers Provide a custom set of etcd endpoints. Default: "http://127.0.0.1:2379,http://127.0.0.1:4001" -#### --etcd-request-timeout +#### etcd_request_timeout Amount of time in seconds to allow a single etcd request before considering it failed. Default: 1.0 -#### --etcd-cafile, --etcd-keyfile, --etcd-certfile +#### etcd_cafile, etcd_keyfile, etcd_certfile Provide TLS configuration when SSL certificate authentication is enabled in etcd endpoints Default: "" -#### --etcd-key-prefix +#### etcd_key_prefix Keyspace path for fleet data in etcd. Default: "/_coreos.com/fleet/" -#### --public-ip +#### public_ip IP address that should be published with the local Machine's state and any socket information. If not set, fleetd will attempt to detect the IP it should publish based on the machine's IP routing information. Default: "" -#### --metadata +#### metadata Comma-delimited key/value pairs that are published with the local to the fleet registry. This data can be used directly by a client of fleet to make scheduling decisions. An example set of metadata could look like: @@ -192,31 +183,31 @@ Space and tab characters will be stripped around the equals sign and around each Default: "" -#### --agent-ttl +#### agent_ttl An Agent will be considered dead if it exceeds this amount of time to communicate with the Registry. The agent will attempt a heartbeat at half of this value. Default: "30s" -#### --engine-reconcile-interval +#### engine_reconcile_interval Interval in seconds at which the engine should reconcile the cluster schedule in etcd. Default: 2 -#### --token-limit +#### token_limit Maximum number of entries per page returned from API requests. Default: "100" -### --disable-engine +### disable_engine Disable the engine entirely, use with care. You can find more info about this option in [fleet scaling doc][fleet-scale]. Default: false -### --disable-watches +### disable_watches Disable the use of etcd watches. Increases scheduling latency. You can find more info about this option in [fleet scaling doc][fleet-scale]. diff --git a/Documentation/fleet-scaling.md b/Documentation/fleet-scaling.md index 0a7554a76..a501bbc31 100644 --- a/Documentation/fleet-scaling.md +++ b/Documentation/fleet-scaling.md @@ -39,13 +39,13 @@ RPCs between the engine and agent. this is an expensive operation. The fewer nodes that are engaged in this election, the better. Possible downside is that if there isn't a leader at all, the cluster is inoperable. However the (usually) 5 machines running - etcd are also a single point of failure. *See the `--disable-engine` flag.* + etcd are also a single point of failure. *See the `disable_engine` config flag.* * Making some defaults exported and allow them to be overridden. For instance fleet's tokenLimit controls how many Units are listed per "page". *See the - `--token-limit` flag.* + `token_limit` config flag.* * Removing watches from fleet: By removing the watches from fleet we stop the entire cluster from walking up whenever a new job is to be scheduled. The downside of this change is that fleet's responsiveness is lower. - *See the `--disable-watches` flag.* + *See the `disable_watches` config flag.* From fecff7f8792ff743aaa7f6a419bbaf85c088ee6d Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 17 Feb 2016 12:16:17 +0100 Subject: [PATCH 16/26] fleetctl: add tryWaitForUnitStates() and getBlockAttempts() * tryWaitForUnitStates() tries to wait for units to reach the desired state. * getBlockAttempts() gets the correct value of how many attempts to try before giving up on an operation. These helpers will be used to make the code more consistent and clean. We do not intended to change any behaviour here. --- fleetctl/fleetctl.go | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 71785606d..09a75b6aa 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -746,6 +746,56 @@ func setTargetStateOfUnits(units []string, state job.JobState) ([]*schema.Unit, return triggered, nil } +// getBlockAttempts gets the correct value of how many attempts to try +// before giving up on an operation. +// It returns a negative value which means try forever, if zero is +// returned then do not make any attempt, and if a positive value is +// returned then try up to that value +func getBlockAttempts() int { + // By default we wait forever + var attempts int = -1 + + // Up to BlockAttempts + if sharedFlags.BlockAttempts > 0 { + attempts = sharedFlags.BlockAttempts + } + + // NoBlock we do not wait + if sharedFlags.NoBlock { + attempts = 0 + } + + return attempts +} + +// tryWaitForUnitStates tries to wait for units to reach the desired state. +// It takes 5 arguments, the units to wait for, the desired state, the +// desired JobState, how many attempts before timing out and a writer +// interface. +// tryWaitForUnitStates polls each of the indicated units until they +// reach the desired state. If maxAttempts is zero, then it will not +// wait, it will assume that all units reached their desired state. +// If maxAttempts is negative tryWaitForUnitStates will retry forever, and +// if it is greater than zero, it will retry up to the indicated value. +// It returns 0 on success or 1 on errors. +func tryWaitForUnitStates(units []string, state string, js job.JobState, maxAttempts int, out io.Writer) (ret int) { + // We do not wait just assume we reached the desired state + if maxAttempts == 0 { + for _, name := range units { + stdout("Triggered unit %s %s", name, state) + } + return + } + + errchan := waitForUnitStates(units, js, maxAttempts, out) + for err := range errchan { + stderr("Error waiting for units: %v", err) + ret = 1 + } + + return +} + // waitForUnitStates polls each of the indicated units until each of their // states is equal to that which the caller indicates, or until the // polling operation times out. waitForUnitStates will retry forever, or From f47f906a961a0efa36a3968fcfafffe1af61a345 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:35:56 +0100 Subject: [PATCH 17/26] fleetctl: {load|start|stop|unload} use the tryWaitForUnitStates() and getBlockAttempts() --- fleetctl/load.go | 14 +++----------- fleetctl/start.go | 14 +++----------- fleetctl/stop.go | 14 +++----------- fleetctl/unload.go | 14 +++----------- 4 files changed, 12 insertions(+), 44 deletions(-) diff --git a/fleetctl/load.go b/fleetctl/load.go index 497f86058..8659a1498 100644 --- a/fleetctl/load.go +++ b/fleetctl/load.go @@ -46,6 +46,8 @@ func init() { } func runLoadUnits(args []string) (exit int) { + attempts := getBlockAttempts() + if err := lazyCreateUnits(args); err != nil { stderr("Error creating units: %v", err) return 1 @@ -66,17 +68,7 @@ func runLoadUnits(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(loading, job.JobStateLoaded, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range loading { - stdout("Triggered unit %s load", name) - } - } + exit = tryWaitForUnitStates(loading, "load", job.JobStateLoaded, attempts, os.Stdout) return } diff --git a/fleetctl/start.go b/fleetctl/start.go index 77ea0d343..17c8a53da 100644 --- a/fleetctl/start.go +++ b/fleetctl/start.go @@ -54,6 +54,8 @@ func init() { } func runStartUnit(args []string) (exit int) { + attempts := getBlockAttempts() + if err := lazyCreateUnits(args); err != nil { stderr("Error creating units: %v", err) return 1 @@ -74,17 +76,7 @@ func runStartUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(starting, job.JobStateLaunched, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range starting { - stdout("Triggered unit %s start", name) - } - } + exit = tryWaitForUnitStates(starting, "start", job.JobStateLaunched, attempts, os.Stdout) return } diff --git a/fleetctl/stop.go b/fleetctl/stop.go index 262946bfb..c63904107 100644 --- a/fleetctl/stop.go +++ b/fleetctl/stop.go @@ -52,6 +52,8 @@ func init() { } func runStopUnit(args []string) (exit int) { + attempts := getBlockAttempts() + units, err := findUnits(args) if err != nil { stderr("%v", err) @@ -79,17 +81,7 @@ func runStopUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(stopping, job.JobStateLoaded, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range stopping { - stdout("Triggered unit %s stop", name) - } - } + exit = tryWaitForUnitStates(stopping, "stop", job.JobStateLoaded, attempts, os.Stdout) return } diff --git a/fleetctl/unload.go b/fleetctl/unload.go index 6758f2825..c9700496a 100644 --- a/fleetctl/unload.go +++ b/fleetctl/unload.go @@ -36,6 +36,8 @@ func init() { } func runUnloadUnit(args []string) (exit int) { + attempts := getBlockAttempts() + units, err := findUnits(args) if err != nil { stderr("%v", err) @@ -60,17 +62,7 @@ func runUnloadUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(wait, job.JobStateInactive, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range wait { - stdout("Triggered unit %s unload", name) - } - } + exit = tryWaitForUnitStates(wait, "unload", job.JobStateInactive, attempts, os.Stdout) return } From ed4569ca764088e06add12a6cdd580738bc48766 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:42:48 +0100 Subject: [PATCH 18/26] fleetctl: move logic to lookup a unit into getUnitFile() and getUnitFileFromTemplate() --- fleetctl/fleetctl.go | 119 ++++++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 41 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 09a75b6aa..3b99f7f2a 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -483,6 +483,36 @@ func getChecker() *ssh.HostKeyChecker { return ssh.NewHostKeyChecker(keyFile) } +func getUnitFile(file string) (*unit.UnitFile, error) { + var uf *unit.UnitFile + name := unitNameMangle(file) + + log.Debugf("Looking up for Unit(%s) or its corresponding template", name) + + // Assume that the file references a local unit file on disk and + // attempt to load it, if it exists + if _, err := os.Stat(file); !os.IsNotExist(err) { + uf, err = getUnitFromFile(file) + if err != nil { + return nil, fmt.Errorf("failed getting Unit(%s) from file: %v", file, err) + } + } else { + // Otherwise (if the unit file does not exist), check if the + // name appears to be an instance unit, and if so, check for + // a corresponding template unit in the Registry or disk + uf, err = getUnitFileFromTemplate(file) + if err != nil { + return nil, err + } + + // If we found a template unit, create a near-identical instance unit in + // the Registry - same unit file as the template, but different name + } + + log.Debugf("Found Unit(%s)", name) + return uf, nil +} + // getUnitFromFile attempts to load a Unit from a given filename // It returns the Unit or nil, and any error encountered func getUnitFromFile(file string) (*unit.UnitFile, error) { @@ -497,6 +527,48 @@ func getUnitFromFile(file string) (*unit.UnitFile, error) { return unit.NewUnitFile(string(out)) } +// getUnitFileFromTemplate checks if the name appears to be an instance unit +// and gets its corresponding template unit from the registry or local disk +// It returns the Unit or nil; and any error encountered +func getUnitFileFromTemplate(arg string) (*unit.UnitFile, error) { + var uf *unit.UnitFile + name := unitNameMangle(arg) + + // Check if the name appears to be an instance unit, and if so, + // check for a corresponding template unit in the Registry + uni := unit.NewUnitNameInfo(name) + if uni == nil { + return nil, fmt.Errorf("error extracting information from unit name %s", name) + } else if !uni.IsInstance() { + return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) + } + + tmpl, err := cAPI.Unit(uni.Template) + if err != nil { + return nil, fmt.Errorf("error retrieving template Unit(%s) from Registry: %v", uni.Template, err) + } + + if tmpl != nil { + warnOnDifferentLocalUnit(arg, tmpl) + uf = schema.MapSchemaUnitOptionsToUnitFile(tmpl.Options) + log.Debugf("Template Unit(%s) found in registry", uni.Template) + } else { + // Finally, if we could not find a template unit in the Registry, + // check the local disk for one instead + file := path.Join(path.Dir(arg), uni.Template) + if _, err := os.Stat(file); os.IsNotExist(err) { + return nil, fmt.Errorf("unable to find Unit(%s) or template Unit(%s) in Registry or on filesystem", name, uni.Template) + } + + uf, err = getUnitFromFile(file) + if err != nil { + return nil, fmt.Errorf("failed getting template Unit(%s) from file: %v", uni.Template, err) + } + } + + return uf, nil +} + func getTunnelFlag() string { tun := globalFlags.Tunnel if tun != "" && !strings.Contains(tun, ":") { @@ -599,8 +671,6 @@ func lazyCreateUnits(args []string) error { errchan := make(chan error) var wg sync.WaitGroup for _, arg := range args { - // TODO(jonboulle): this loop is getting too unwieldy; factor it out - arg = maybeAppendDefaultUnitType(arg) name := unitNameMangle(arg) @@ -615,45 +685,12 @@ func lazyCreateUnits(args []string) error { continue } - var uf *unit.UnitFile - // Failing that, assume the name references a local unit file on disk, and attempt to load that, if it exists - // TODO(mischief): consolidate these two near-identical codepaths - if _, err := os.Stat(arg); !os.IsNotExist(err) { - uf, err = getUnitFromFile(arg) - if err != nil { - return fmt.Errorf("failed getting Unit(%s) from file: %v", arg, err) - } - } else { - // Otherwise (if the unit file does not exist), check if the name appears to be an instance unit, - // and if so, check for a corresponding template unit in the Registry - uni := unit.NewUnitNameInfo(name) - if uni == nil { - return fmt.Errorf("error extracting information from unit name %s", name) - } else if !uni.IsInstance() { - return fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) - } - tmpl, err := cAPI.Unit(uni.Template) - if err != nil { - return fmt.Errorf("error retrieving template Unit(%s) from Registry: %v", uni.Template, err) - } - - // Finally, if we could not find a template unit in the Registry, check the local disk for one instead - if tmpl == nil { - file := path.Join(path.Dir(arg), uni.Template) - if _, err := os.Stat(file); os.IsNotExist(err) { - return fmt.Errorf("unable to find Unit(%s) or template Unit(%s) in Registry or on filesystem", name, uni.Template) - } - uf, err = getUnitFromFile(file) - if err != nil { - return fmt.Errorf("failed getting template Unit(%s) from file: %v", uni.Template, err) - } - } else { - warnOnDifferentLocalUnit(arg, tmpl) - uf = schema.MapSchemaUnitOptionsToUnitFile(tmpl.Options) - } - - // If we found a template unit, create a near-identical instance unit in - // the Registry - same unit file as the template, but different name + // Assume that the name references a local unit file on + // disk or if it is an instance unit and if so get its + // corresponding unit + uf, err := getUnitFile(arg) + if err != nil { + return err } _, err = createUnit(name, uf) From 42973612e5b56987366bb63a9f548748dd0f3016 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 17 Feb 2016 16:15:10 +0100 Subject: [PATCH 19/26] fleetctl: inline getBlockAttempts() in tryWaitForUnitStates() calls --- fleetctl/load.go | 4 +--- fleetctl/start.go | 4 +--- fleetctl/stop.go | 4 +--- fleetctl/unload.go | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/fleetctl/load.go b/fleetctl/load.go index 8659a1498..0f7b0923c 100644 --- a/fleetctl/load.go +++ b/fleetctl/load.go @@ -46,8 +46,6 @@ func init() { } func runLoadUnits(args []string) (exit int) { - attempts := getBlockAttempts() - if err := lazyCreateUnits(args); err != nil { stderr("Error creating units: %v", err) return 1 @@ -68,7 +66,7 @@ func runLoadUnits(args []string) (exit int) { } } - exit = tryWaitForUnitStates(loading, "load", job.JobStateLoaded, attempts, os.Stdout) + exit = tryWaitForUnitStates(loading, "load", job.JobStateLoaded, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/start.go b/fleetctl/start.go index 17c8a53da..2ded29539 100644 --- a/fleetctl/start.go +++ b/fleetctl/start.go @@ -54,8 +54,6 @@ func init() { } func runStartUnit(args []string) (exit int) { - attempts := getBlockAttempts() - if err := lazyCreateUnits(args); err != nil { stderr("Error creating units: %v", err) return 1 @@ -76,7 +74,7 @@ func runStartUnit(args []string) (exit int) { } } - exit = tryWaitForUnitStates(starting, "start", job.JobStateLaunched, attempts, os.Stdout) + exit = tryWaitForUnitStates(starting, "start", job.JobStateLaunched, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/stop.go b/fleetctl/stop.go index c63904107..941b33d7b 100644 --- a/fleetctl/stop.go +++ b/fleetctl/stop.go @@ -52,8 +52,6 @@ func init() { } func runStopUnit(args []string) (exit int) { - attempts := getBlockAttempts() - units, err := findUnits(args) if err != nil { stderr("%v", err) @@ -81,7 +79,7 @@ func runStopUnit(args []string) (exit int) { } } - exit = tryWaitForUnitStates(stopping, "stop", job.JobStateLoaded, attempts, os.Stdout) + exit = tryWaitForUnitStates(stopping, "stop", job.JobStateLoaded, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/unload.go b/fleetctl/unload.go index c9700496a..48eb833fc 100644 --- a/fleetctl/unload.go +++ b/fleetctl/unload.go @@ -36,8 +36,6 @@ func init() { } func runUnloadUnit(args []string) (exit int) { - attempts := getBlockAttempts() - units, err := findUnits(args) if err != nil { stderr("%v", err) @@ -62,7 +60,7 @@ func runUnloadUnit(args []string) (exit int) { } } - exit = tryWaitForUnitStates(wait, "unload", job.JobStateInactive, attempts, os.Stdout) + exit = tryWaitForUnitStates(wait, "unload", job.JobStateInactive, getBlockAttempts(), os.Stdout) return } From c7510c386e69e8cbdf9b90762bc75032dd09612c Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:45:04 +0100 Subject: [PATCH 20/26] fleetctl: improve code comment about getUnitFileFromTemplate() Improve code comments about getUnitFileFromTemplate() and kill some other useless code comments. --- fleetctl/fleetctl.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 3b99f7f2a..67fab4562 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -487,7 +487,7 @@ func getUnitFile(file string) (*unit.UnitFile, error) { var uf *unit.UnitFile name := unitNameMangle(file) - log.Debugf("Looking up for Unit(%s) or its corresponding template", name) + log.Debugf("Looking for Unit(%s) or its corresponding template", name) // Assume that the file references a local unit file on disk and // attempt to load it, if it exists @@ -499,14 +499,14 @@ func getUnitFile(file string) (*unit.UnitFile, error) { } else { // Otherwise (if the unit file does not exist), check if the // name appears to be an instance unit, and if so, check for - // a corresponding template unit in the Registry or disk + // a corresponding template unit in the Registry or disk. + // If we found a template unit, later we create a + // near-identical instance unit in the Registry - same + // unit file as the template, but different name uf, err = getUnitFileFromTemplate(file) if err != nil { return nil, err } - - // If we found a template unit, create a near-identical instance unit in - // the Registry - same unit file as the template, but different name } log.Debugf("Found Unit(%s)", name) @@ -792,12 +792,10 @@ func getBlockAttempts() int { // By default we wait forever var attempts int = -1 - // Up to BlockAttempts if sharedFlags.BlockAttempts > 0 { attempts = sharedFlags.BlockAttempts } - // NoBlock we do not wait if sharedFlags.NoBlock { attempts = 0 } From bd57a752423c8d0f8d4cf2f2c45f965341ca96cf Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Sat, 20 Feb 2016 10:22:40 +0100 Subject: [PATCH 21/26] fleetctl: getBlockAttempts() standarise on negative meaning do not block --- fleetctl/fleetctl.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 67fab4562..9a7d76a4a 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -785,19 +785,19 @@ func setTargetStateOfUnits(units []string, state job.JobState) ([]*schema.Unit, // getBlockAttempts gets the correct value of how many attempts to try // before giving up on an operation. -// It returns a negative value which means try forever, if zero is -// returned then do not make any attempt, and if a positive value is +// It returns a negative value which means do not block, if zero is +// returned then it means try forever, and if a positive value is // returned then try up to that value func getBlockAttempts() int { // By default we wait forever - var attempts int = -1 + var attempts int = 0 if sharedFlags.BlockAttempts > 0 { attempts = sharedFlags.BlockAttempts } if sharedFlags.NoBlock { - attempts = 0 + attempts = -1 } return attempts @@ -808,14 +808,14 @@ func getBlockAttempts() int { // desired JobState, how many attempts before timing out and a writer // interface. // tryWaitForUnitStates polls each of the indicated units until they -// reach the desired state. If maxAttempts is zero, then it will not +// reach the desired state. If maxAttempts is negative, then it will not // wait, it will assume that all units reached their desired state. -// If maxAttempts is negative tryWaitForUnitStates will retry forever, and +// If maxAttempts is zero tryWaitForUnitStates will retry forever, and // if it is greater than zero, it will retry up to the indicated value. // It returns 0 on success or 1 on errors. func tryWaitForUnitStates(units []string, state string, js job.JobState, maxAttempts int, out io.Writer) (ret int) { // We do not wait just assume we reached the desired state - if maxAttempts == 0 { + if maxAttempts <= -1 { for _, name := range units { stdout("Triggered unit %s %s", name, state) } From 4d77bd1b86407dbe1ef5e4e58f0cd9e1f7e2fd5b Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:48:05 +0100 Subject: [PATCH 22/26] fleetctl:test: push tests for getBlockAttempts() --- fleetctl/fleetctl_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/fleetctl/fleetctl_test.go b/fleetctl/fleetctl_test.go index 0d50de40c..643292809 100644 --- a/fleetctl/fleetctl_test.go +++ b/fleetctl/fleetctl_test.go @@ -181,6 +181,37 @@ func TestUnitNameMangle(t *testing.T) { } } +func TestGetBlockAttempts(t *testing.T) { + oldNoBlock := sharedFlags.NoBlock + oldBlockAttempts := sharedFlags.BlockAttempts + + defer func() { + sharedFlags.NoBlock = oldNoBlock + sharedFlags.BlockAttempts = oldBlockAttempts + }() + + var blocktests = []struct { + noBlock bool + blockAttempts int + expected int + }{ + {true, 0, -1}, + {true, -1, -1}, + {true, 9999, -1}, + {false, 0, 0}, + {false, -1, 0}, + {false, 9999, 9999}, + } + + for _, tt := range blocktests { + sharedFlags.NoBlock = tt.noBlock + sharedFlags.BlockAttempts = tt.blockAttempts + if n := getBlockAttempts(); n != tt.expected { + t.Errorf("got %d, want %d", n, tt.expected) + } + } +} + func newUnitFile(t *testing.T, contents string) *unit.UnitFile { uf, err := unit.NewUnitFile(contents) if err != nil { From f1c438fd1018ea1e606f79e88a34afe36acf67c9 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:51:05 +0100 Subject: [PATCH 23/26] fleetctl: improve getUnitFile() error handling and add some documentation Improve getUnitFile() error handling and add some documentation to getUnitFileFromTemplate() --- fleetctl/fleetctl.go | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 9a7d76a4a..8064fd20c 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -498,14 +498,20 @@ func getUnitFile(file string) (*unit.UnitFile, error) { } } else { // Otherwise (if the unit file does not exist), check if the - // name appears to be an instance unit, and if so, check for - // a corresponding template unit in the Registry or disk. + // name appears to be an instance of a template unit + info, err := getUnitInstanceInfo(name) + if err != nil { + return nil, fmt.Errorf("failed getting template Unit(%s) info: %v", name, err) + } + + // If it is an instance check for a corresponding template + // unit in the Registry or disk. // If we found a template unit, later we create a // near-identical instance unit in the Registry - same // unit file as the template, but different name - uf, err = getUnitFileFromTemplate(file) + uf, err = getUnitFileFromTemplate(file, info) if err != nil { - return nil, err + return nil, fmt.Errorf("failed getting Unit(%s) from template: %v", file, err) } } @@ -527,25 +533,27 @@ func getUnitFromFile(file string) (*unit.UnitFile, error) { return unit.NewUnitFile(string(out)) } -// getUnitFileFromTemplate checks if the name appears to be an instance unit -// and gets its corresponding template unit from the registry or local disk -// It returns the Unit or nil; and any error encountered -func getUnitFileFromTemplate(arg string) (*unit.UnitFile, error) { - var uf *unit.UnitFile - name := unitNameMangle(arg) - - // Check if the name appears to be an instance unit, and if so, - // check for a corresponding template unit in the Registry +func getUnitInstanceInfo(name string) (*unit.UnitNameInfo, error) { + // Check if the name appears to be an instance unit uni := unit.NewUnitNameInfo(name) if uni == nil { - return nil, fmt.Errorf("error extracting information from unit name %s", name) + return nil, errors.New("unable to extract information from unit name") } else if !uni.IsInstance() { - return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) + return nil, errors.New("Not an instance of a template unit") } + return uni, nil +} + +// getUnitFileFromTemplate checks if the name appears to be an instance unit +// and gets its corresponding template unit from the registry or local disk +// It returns the Unit or nil; and any error encountered +func getUnitFileFromTemplate(arg string, uni *unit.UnitNameInfo) (*unit.UnitFile, error) { + var uf *unit.UnitFile + tmpl, err := cAPI.Unit(uni.Template) if err != nil { - return nil, fmt.Errorf("error retrieving template Unit(%s) from Registry: %v", uni.Template, err) + return nil, fmt.Errorf("unable to retrieve Unit(%s) from Registry: %v", uni.Template, err) } if tmpl != nil { @@ -557,12 +565,12 @@ func getUnitFileFromTemplate(arg string) (*unit.UnitFile, error) { // check the local disk for one instead file := path.Join(path.Dir(arg), uni.Template) if _, err := os.Stat(file); os.IsNotExist(err) { - return nil, fmt.Errorf("unable to find Unit(%s) or template Unit(%s) in Registry or on filesystem", name, uni.Template) + return nil, fmt.Errorf("unable to find Unit(%s) on filesystem", file) } uf, err = getUnitFromFile(file) if err != nil { - return nil, fmt.Errorf("failed getting template Unit(%s) from file: %v", uni.Template, err) + return nil, fmt.Errorf("unable to load Unit(%s) from file: %v", file, err) } } From 8881d3fdb5d41f9c5e1309d6107391be0ba8016e Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:53:44 +0100 Subject: [PATCH 24/26] fleetctl: for errors indicate that getUnitFileFromTemplate() tried both registry and filesystem --- fleetctl/fleetctl.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 8064fd20c..5e7d2e8c4 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -501,7 +501,7 @@ func getUnitFile(file string) (*unit.UnitFile, error) { // name appears to be an instance of a template unit info, err := getUnitInstanceInfo(name) if err != nil { - return nil, fmt.Errorf("failed getting template Unit(%s) info: %v", name, err) + return nil, fmt.Errorf("failed getting Unit(%s) info: %v", name, err) } // If it is an instance check for a corresponding template @@ -539,7 +539,7 @@ func getUnitInstanceInfo(name string) (*unit.UnitNameInfo, error) { if uni == nil { return nil, errors.New("unable to extract information from unit name") } else if !uni.IsInstance() { - return nil, errors.New("Not an instance of a template unit") + return nil, errors.New("unable to find Unit in Registry or on filesystem") } return uni, nil @@ -565,7 +565,7 @@ func getUnitFileFromTemplate(arg string, uni *unit.UnitNameInfo) (*unit.UnitFile // check the local disk for one instead file := path.Join(path.Dir(arg), uni.Template) if _, err := os.Stat(file); os.IsNotExist(err) { - return nil, fmt.Errorf("unable to find Unit(%s) on filesystem", file) + return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", uni.Template) } uf, err = getUnitFromFile(file) From 60a7c54235031cf14161b8d4a1354d30453eba82 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:07:21 +0100 Subject: [PATCH 25/26] fleetctl: improve getUnitFile() and getUnitFileFromTemplate() Godoc --- fleetctl/fleetctl.go | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 5e7d2e8c4..8d4954088 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -483,6 +483,13 @@ func getChecker() *ssh.HostKeyChecker { return ssh.NewHostKeyChecker(keyFile) } +// getUnitFile attempts to get a UnitFile configuration +// It takes a unit file name as a parameter and tries first to lookup +// the unit from the local disk. If it fails, it checks if the provided +// file name may reference an instance of a template unit, if so, it +// tries to get the template configuration either from the registry or +// the local disk. +// It returns a UnitFile configuration or nil; and any error ecountered func getUnitFile(file string) (*unit.UnitFile, error) { var uf *unit.UnitFile name := unitNameMangle(file) @@ -509,7 +516,7 @@ func getUnitFile(file string) (*unit.UnitFile, error) { // If we found a template unit, later we create a // near-identical instance unit in the Registry - same // unit file as the template, but different name - uf, err = getUnitFileFromTemplate(file, info) + uf, err = getUnitFileFromTemplate(info, file) if err != nil { return nil, fmt.Errorf("failed getting Unit(%s) from template: %v", file, err) } @@ -545,10 +552,11 @@ func getUnitInstanceInfo(name string) (*unit.UnitNameInfo, error) { return uni, nil } -// getUnitFileFromTemplate checks if the name appears to be an instance unit -// and gets its corresponding template unit from the registry or local disk +// getUnitFileFromTemplate attempts to get a Unit from a template unit that +// is either in the registry or on the file system +// It takes two arguments, the template information and the unit file name // It returns the Unit or nil; and any error encountered -func getUnitFileFromTemplate(arg string, uni *unit.UnitNameInfo) (*unit.UnitFile, error) { +func getUnitFileFromTemplate(uni *unit.UnitNameInfo, fileName string) (*unit.UnitFile, error) { var uf *unit.UnitFile tmpl, err := cAPI.Unit(uni.Template) @@ -557,20 +565,20 @@ func getUnitFileFromTemplate(arg string, uni *unit.UnitNameInfo) (*unit.UnitFile } if tmpl != nil { - warnOnDifferentLocalUnit(arg, tmpl) + warnOnDifferentLocalUnit(fileName, tmpl) uf = schema.MapSchemaUnitOptionsToUnitFile(tmpl.Options) log.Debugf("Template Unit(%s) found in registry", uni.Template) } else { // Finally, if we could not find a template unit in the Registry, // check the local disk for one instead - file := path.Join(path.Dir(arg), uni.Template) - if _, err := os.Stat(file); os.IsNotExist(err) { + filePath := path.Join(path.Dir(fileName), uni.Template) + if _, err := os.Stat(filePath); os.IsNotExist(err) { return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", uni.Template) } - uf, err = getUnitFromFile(file) + uf, err = getUnitFromFile(filePath) if err != nil { - return nil, fmt.Errorf("unable to load Unit(%s) from file: %v", file, err) + return nil, fmt.Errorf("unable to load Unit(%s) from file: %v", filePath, err) } } From cab3099d81ba26115e80bb8d9f99c62be26431cb Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 2 Mar 2016 13:17:33 +0100 Subject: [PATCH 26/26] fleetctl: just inline getUnitInstanceInfo() and restore previous error messages --- fleetctl/fleetctl.go | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 8d4954088..d16863635 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -506,9 +506,11 @@ func getUnitFile(file string) (*unit.UnitFile, error) { } else { // Otherwise (if the unit file does not exist), check if the // name appears to be an instance of a template unit - info, err := getUnitInstanceInfo(name) + info := unit.NewUnitNameInfo(name) if err != nil { - return nil, fmt.Errorf("failed getting Unit(%s) info: %v", name, err) + return nil, fmt.Errorf("error extracting information from unit name %s", name) + } else if !info.IsInstance() { + return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) } // If it is an instance check for a corresponding template @@ -540,18 +542,6 @@ func getUnitFromFile(file string) (*unit.UnitFile, error) { return unit.NewUnitFile(string(out)) } -func getUnitInstanceInfo(name string) (*unit.UnitNameInfo, error) { - // Check if the name appears to be an instance unit - uni := unit.NewUnitNameInfo(name) - if uni == nil { - return nil, errors.New("unable to extract information from unit name") - } else if !uni.IsInstance() { - return nil, errors.New("unable to find Unit in Registry or on filesystem") - } - - return uni, nil -} - // getUnitFileFromTemplate attempts to get a Unit from a template unit that // is either in the registry or on the file system // It takes two arguments, the template information and the unit file name