diff --git a/.travis.yml b/.travis.yml index 8c44610c8..0b3f8a413 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,12 @@ language: go matrix: include: - - go: 1.4.2 + - go: 1.4.3 install: - go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/vet - - go: 1.5.1 + - go: 1.5.3 + - go: 1.6 script: - ./test diff --git a/Documentation/architecture.md b/Documentation/architecture.md index 6f43e7f55..af93a4874 100644 --- a/Documentation/architecture.md +++ b/Documentation/architecture.md @@ -11,7 +11,7 @@ Every system in the fleet cluster runs a single `fleetd` daemon. Each daemon enc - The engine uses a _lease model_ to enforce that only one engine is running at a time. Every time a reconciliation is due, an engine will attempt to take a lease on etcd. If the lease succeeds, the reconciliation proceeds; otherwise, that engine will remain idle until the next reconciliation period begins. - The engine uses a simplistic "least-loaded" scheduling algorithm: when considering where to schedule a given unit, preference is given to agents running the smallest number of units. -The reconciliation loop of the engine can be disabled with the `--disable-engine` flag. This means that +The reconciliation loop of the engine can be disabled with the `disable_engine` config flag. This means that this `fleetd` daemon will *never* become a cluster leader. If all running daemons have this setting, your cluster is dead; i.e. no jobs will be scheduled. Use with care. @@ -50,18 +50,28 @@ A UnitState object represents the state of a Unit in the fleet engine. A UnitSta ## Preview Release -Current releases of fleet don't currently perform any authentication or authorization for submitted units. This means that any client that can access your etcd cluster can potentially run arbitrary code on many of your machines very easily. +Current releases of fleet don't currently perform any authentication or authorization for submitted units. This means that any client that can access your etcd cluster can potentially run arbitrary code on many of your machines very easily, thus it is strongly recommended to enable [TLS authentication][etcd-security] on the etcd side, set proper file permissions to the keypair on the host and [configure fleet][fleet-tls] to use keypair. ## Securing etcd You should avoid public access to etcd and instead run fleet [from your local laptop][using-the-client] with the `--tunnel` flag to run commands over an SSH tunnel. You can alias this flag for easier usage: `alias fleetctl=fleetctl --tunnel 10.10.10.10` - or use the environment variable `FLEETCTL_TUNNEL`. -## Other Notes +## Securing fleetd -Since it interacts directly with systemd over D-Bus, the fleetd daemon must be run with elevated privileges (i.e. as root) in order to perform operations like starting and stopping services. From the [systemd D-Bus documentation][systemd-dbus]: +It is also recommended to run fleetd under separate `fleet` user and group, and set the permissions of the fleetd API's listening Unix socket to `0660`. This will require local user to be in `fleet` group to perform an action with fleetd. Since the fleet daemon uses [D-Bus][d-bus] to communicate with systemd it is necessary to create a [`polkit(8)`][polkit] rule to allow fleetd to communicate with systemd: -> In contrast to most of the other services of the systemd suite PID 1 does not use PolicyKit for controlling access to privileged operations, but relies exclusively on the low-level D-Bus policy language. (This is done in order to avoid a cyclic dependency between PolicyKit and systemd/PID 1.) This means that sensitive operations exposed by PID 1 on the bus are generally not available to unprivileged processes directly. +```js +polkit.addRule(function(action, subject) { + if (action.id.indexOf("org.freedesktop.systemd1.") == 0 && + subject.user == "fleet") { + return polkit.Result.YES; + } +}); +``` +[etcd-security]: https://github.com/coreos/etcd/blob/master/Documentation/security.md +[d-bus]: https://www.freedesktop.org/wiki/Software/dbus/ +[fleet-tls]: deployment-and-configuration.md#tls-authentication +[polkit]: https://www.freedesktop.org/software/polkit/docs/latest/polkit.8.html [states documentation]: states.md [using-the-client]: using-the-client.md#get-up-and-running -[systemd-dbus]: http://www.freedesktop.org/wiki/Software/systemd/dbus/ diff --git a/Documentation/deployment-and-configuration.md b/Documentation/deployment-and-configuration.md index e0f26803d..f579b0c96 100644 --- a/Documentation/deployment-and-configuration.md +++ b/Documentation/deployment-and-configuration.md @@ -8,7 +8,45 @@ Deploying `fleet` on CoreOS is even simpler: just run `systemctl start fleet`. T Each `fleetd` daemon must be configured to talk to the same [etcd cluster][etcd]. By default, the `fleetd` daemon will connect to either http://127.0.0.1:2379 or http://127.0.0.1:4001, depending on which endpoint responds. Refer to the configuration documentation below for customization help. -`fleet` requires etcd be of version 0.3.0+. +`fleet` requires etcd be of version 0.3.0+ but it is recommended to use etcd 2.0.0+ which supports [TLS authentication][etcd-security]. + +### TLS Authentication + +If your etcd cluster has [TLS authentication][etcd-security] enabled, you will need to configure fleet to use an appropriate TLS keypair. The examples below show how to achieve this: + +#### Using systemd Drop-Ins + +```ini +[Service] +Environment="FLEET_ETCD_CAFILE=/etc/ssl/etcd/ca.pem" +Environment="FLEET_ETCD_CERTFILE=/etc/ssl/etcd/client.pem" +Environment="FLEET_ETCD_KEYFILE=/etc/ssl/etcd/client-key.pem" +Environment="FLEET_ETCD_SERVERS=https://172.16.0.101:2379,https://172.16.0.102:2379,https://172.16.0.103:2379" +Environment="FLEET_METADATA=hostname=server1" +Environment="FLEET_PUBLIC_IP=172.16.0.101" +``` + +#### Using CoreOS Cloud Config + +```yaml +#cloud-config + +coreos: + fleet: + etcd_servers: "https://192.0.2.12:2379" + etcd_cafile: /etc/ssl/etcd/ca.pem + etcd_certfile: /etc/ssl/etcd/client.pem + etcd_keyfile: /etc/ssl/etcd/client-key.pem +``` + +#### Using fleet configuration file + +```ini +etcd_servers=["https://192.0.2.12:2379"] +etcd_cafile=/etc/ssl/etcd/ca.pem +etcd_certfile=/etc/ssl/etcd/client.pem +etcd_keyfile=/etc/ssl/etcd/client-key.pem +``` ## systemd @@ -20,15 +58,15 @@ The `fleetctl` client tool uses SSH to interact with a fleet cluster. This means Authorizing a public SSH key is typically as easy as appending it to the user's `~/.ssh/authorized_keys` file. This may not be true on your systemd, though. If running CoreOS, use the built-in `update-ssh-keys` utility - it helps manage multiple authorized keys. -To make things incredibly easy, included in the [fleet source][fleetctl-inject-ssh] is a script that will distribute SSH keys across a fleet cluster running on CoreOS. Simply pipe the contents of a public SSH key into the script: +To make things incredibly easy, included in the [fleet source][fleet-inject-ssh] is a script that will distribute SSH keys across a fleet cluster running on CoreOS. Simply pipe the contents of a public SSH key into the script: -``` +```sh cat ~/.ssh/id_rsa.pub | ./fleetctl-inject-ssh.sh simon ``` All but the first argument to `fleetctl-inject-ssh.sh` are passed directly to `fleetctl`. -``` +```sh cat ~/.ssh/id_rsa.pub | ./fleetctl-inject-ssh.sh simon --tunnel 19.12.0.33 ``` @@ -40,14 +78,14 @@ The configuration of these interfaces is managed through a [systemd socket unit] CoreOS ships a socket unit for fleet (`fleet.socket`) which binds to a Unix domain socket, `/var/run/fleet.sock`. Unix socket is accessible using tool such as curl (v7.40 or greater): `curl --unix-socket /var/run/fleet.sock http:/fleet/v1/units`. To serve the fleet API over a network address, simply extend or replace this socket unit. -For example, writing the following [drop-in] to `/etc/systemd/system/fleet.socket.d/30-ListenStream.conf` would enable fleet to be reached over the local port `49153` in addition to `/var/run/fleet.sock`: +For example, writing the following [drop-in][drop-in] to `/etc/systemd/system/fleet.socket.d/30-ListenStream.conf` would enable fleet to be reached over the local port `49153` in addition to `/var/run/fleet.sock`: -``` +```ini [Socket] ListenStream=127.0.0.1:49153 ``` -After you've written the file, call `systemctl daemon-reload` to load the new [drop-in], followed by `systemctl stop fleet.service; systemctl restart fleet.socket; systemctl start fleet.service`. +After you've written the file, call `systemctl daemon-reload` to load the new [drop-in][drop-in], followed by `systemctl stop fleet.service; systemctl restart fleet.socket; systemctl start fleet.service`. Once the socket is running, the fleet API will be available at `http://${ListenStream}/fleet/v1`, where `${ListenStream}` is the value of the `ListenStream` option used in your socket file. This endpoint is accessible directly using tools such as curl and wget, or you can use fleetctl like so: `fleetctl --endpoint http://${ListenStream} `. @@ -67,7 +105,7 @@ fleet will look at `/etc/fleet/fleet.conf` for this config file by default. The Environment variables may also provide configuration options. Options provided in an environment variable will override the corresponding option provided in a config file. To use an environment variable, simply prefix the name of a given option with `FLEET_`, while uppercasing the rest of the name. For example, to set the `etcd_servers` option to 'http://192.0.2.12:2379' when running the fleetd binary: -``` +```sh $ FLEET_ETCD_SERVERS=http://192.0.2.12:2379 /usr/bin/fleetd ``` @@ -92,7 +130,7 @@ Amount of time in seconds to allow a single etcd request before considering it f Default: 1.0 -#### etcd_cafile, etcd_keyfile, etcd_certfile +#### etcd_cafile, etcd_keyfile, etcd_certfile Provide TLS configuration when SSL certificate authentication is enabled in etcd endpoints @@ -115,23 +153,31 @@ Default: "" Comma-delimited key/value pairs that are published with the local to the fleet registry. This data can be used directly by a client of fleet to make scheduling decisions. An example set of metadata could look like: - metadata="region=us-west,az=us-west-1" - metadata='region=us-west,az=us-west-1' - metadata=region=us-west,az=us-west-1 +```ini +metadata="region=us-west,az=us-west-1" +metadata='region=us-west,az=us-west-1' +metadata=region=us-west,az=us-west-1 +``` The value of the metadata option should conform to one of these three forms: - - metadata="STRING" - metadata='STRING' - metadata=STRING + +```ini +metadata="STRING" +metadata='STRING' +metadata=STRING +``` ...while STRING is one of: - yyy[,yyy[,yyy...]] +```ini +yyy[,yyy[,yyy...]] +``` ...and yyy is one of: - key=value +```ini +key=value +``` Space and tab characters will be stripped around the equals sign and around each comma. If the same key is defined more than once, the last value overwrites the previous value(s). @@ -149,9 +195,30 @@ Interval in seconds at which the engine should reconcile the cluster schedule in Default: 2 -[etcd]: https://github.com/coreos/docs/blob/master/etcd/getting-started-with-etcd.md +#### token_limit + +Maximum number of entries per page returned from API requests. + +Default: "100" + +### disable_engine + +Disable the engine entirely, use with care. You can find more info about this option in [fleet scaling doc][fleet-scale]. + +Default: false + +### disable_watches + +Disable the use of etcd watches. Increases scheduling latency. You can find more info about this option in [fleet scaling doc][fleet-scale]. + +Default: false + [api-doc]: api-v1.md -[fleetctl-inject-ssh]: /scripts/fleetctl-inject-ssh.sh +[config]: /fleet.conf.sample +[etcd]: https://github.com/coreos/docs/blob/master/etcd/getting-started-with-etcd.md +[etcd-security]: https://github.com/coreos/etcd/blob/master/Documentation/security.md +[fleet-inject-ssh]: /scripts/fleetctl-inject-ssh.sh +[fleet-scale]: fleet-scaling.md#implemented-quick-wins [socket-unit]: http://www.freedesktop.org/software/systemd/man/systemd.socket.html [config]: /fleet.conf.sample [drop-in]: https://github.com/coreos/docs/blob/master/os/using-systemd-drop-in-units.md diff --git a/Documentation/fleet-scaling.md b/Documentation/fleet-scaling.md index 0a7554a76..a501bbc31 100644 --- a/Documentation/fleet-scaling.md +++ b/Documentation/fleet-scaling.md @@ -39,13 +39,13 @@ RPCs between the engine and agent. this is an expensive operation. The fewer nodes that are engaged in this election, the better. Possible downside is that if there isn't a leader at all, the cluster is inoperable. However the (usually) 5 machines running - etcd are also a single point of failure. *See the `--disable-engine` flag.* + etcd are also a single point of failure. *See the `disable_engine` config flag.* * Making some defaults exported and allow them to be overridden. For instance fleet's tokenLimit controls how many Units are listed per "page". *See the - `--token-limit` flag.* + `token_limit` config flag.* * Removing watches from fleet: By removing the watches from fleet we stop the entire cluster from walking up whenever a new job is to be scheduled. The downside of this change is that fleet's responsiveness is lower. - *See the `--disable-watches` flag.* + *See the `disable_watches` config flag.* diff --git a/build b/build index 7ac5497d1..3c32a330b 100755 --- a/build +++ b/build @@ -1,41 +1,24 @@ #!/bin/bash -e -# The -X format changed from go1.4 -> go1.5 -function go_linker_dashX { - local version=$(go version) - local regex="go([0-9]+).([0-9]+)." - if [[ $version =~ $regex ]]; then - if [ ${BASH_REMATCH[1]} -eq "1" -a ${BASH_REMATCH[2]} -le "4" ]; then - echo "$1 \"$2\"" - else - echo "$1=$2" - fi - else - echo "could not determine Go version" - exit 1 - fi -} +CDIR=$(cd `dirname $0` && pwd) +cd $CDIR ORG_PATH="github.com/coreos" REPO_PATH="${ORG_PATH}/fleet" VERSION=$(git describe --dirty) -GLDFLAGS="-X $(go_linker_dashX github.com/coreos/fleet/version.Version ${VERSION})" + +source build-env if [ ! -h gopath/src/${REPO_PATH} ]; then - mkdir -p gopath/src/${ORG_PATH} - ln -s ../../../.. gopath/src/${REPO_PATH} || exit 255 + mkdir -p gopath/src/${ORG_PATH} + ln -s ../../../.. gopath/src/${REPO_PATH} || exit 255 fi -export GOBIN=${PWD}/bin -export GOPATH=${PWD}/gopath - -eval $(go env) - if [ ${GOOS} = "linux" ]; then - echo "Building fleetd..." - CGO_ENABLED=0 go build -o bin/fleetd -a -installsuffix netgo -ldflags "${GLDFLAGS}" ${REPO_PATH}/fleetd + echo "Building fleetd..." + CGO_ENABLED=0 go build -o bin/fleetd -a -installsuffix netgo -ldflags "${GLDFLAGS}" ${REPO_PATH}/fleetd else - echo "Not on Linux - skipping fleetd build" + echo "Not on Linux - skipping fleetd build" fi echo "Building fleetctl..." diff --git a/build-docker b/build-docker index f120c2f77..cfc926389 100755 --- a/build-docker +++ b/build-docker @@ -1,3 +1,5 @@ #!/bin/bash -e -docker run --rm -v $PWD:/opt/fleet -u $(id -u):$(id -g) google/golang:1.4 /bin/bash -c "cd /opt/fleet && ./build" +CDIR=$(cd `dirname $0` && pwd) + +docker run --rm -v $CDIR:/opt/fleet -u $(id -u):$(id -g) google/golang:1.4 /bin/bash -c "cd /opt/fleet && ./build" diff --git a/build-env b/build-env new file mode 100755 index 000000000..9fa387f3c --- /dev/null +++ b/build-env @@ -0,0 +1,23 @@ +# The -X format changed from go1.4 -> go1.5 +function go_linker_dashX { + local version=$(go version) + local regex="go([0-9]+).([0-9]+)." + if [[ $version =~ $regex ]]; then + if [ ${BASH_REMATCH[1]} -eq "1" -a ${BASH_REMATCH[2]} -le "4" ]; then + echo "$1 \"$2\"" + else + echo "$1=$2" + fi + else + echo "could not determine Go version" + exit 1 + fi +} + +export GOBIN=${PWD}/bin +export GOPATH=${PWD}/gopath +export GLDFLAGS="-X $(go_linker_dashX github.com/coreos/fleet/version.Version ${VERSION})" +eval $(go env) +export PATH="${GOROOT}/bin:${PATH}" +export FLEETD_BIN="$(pwd)/bin/fleetd" +export FLEETCTL_BIN="$(pwd)/bin/fleetctl" diff --git a/client/http.go b/client/http.go index 3714f356f..63027c843 100644 --- a/client/http.go +++ b/client/http.go @@ -137,3 +137,7 @@ func is404(err error) bool { googerr, ok := err.(*googleapi.Error) return ok && googerr.Code == http.StatusNotFound } + +func IsErrorUnitNotFound(err error) bool { + return is404(err) +} diff --git a/fleetctl/destroy.go b/fleetctl/destroy.go index 0d319174d..4197e6890 100644 --- a/fleetctl/destroy.go +++ b/fleetctl/destroy.go @@ -16,6 +16,8 @@ package main import ( "time" + + "github.com/coreos/fleet/client" ) var cmdDestroyUnit = &Command{ @@ -42,6 +44,10 @@ func runDestroyUnits(args []string) (exit int) { for _, v := range units { err := cAPI.DestroyUnit(v.Name) if err != nil { + // Ignore 'Unit does not exist' error + if client.IsErrorUnitNotFound(err) { + continue + } stderr("Error destroying units: %v", err) exit = 1 continue @@ -71,7 +77,7 @@ func runDestroyUnits(args []string) (exit int) { if u == nil { break } - time.Sleep(500 * time.Millisecond) + time.Sleep(defaultSleepTime) } } diff --git a/fleetctl/destroy_test.go b/fleetctl/destroy_test.go new file mode 100644 index 000000000..e0e68c95a --- /dev/null +++ b/fleetctl/destroy_test.go @@ -0,0 +1,86 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "sync" + "testing" +) + +func doDestroyUnits(r commandTestResults, errchan chan error) { + exit := runDestroyUnits(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) + } + for _, destroyedUnit := range r.units { + u, _ := cAPI.Unit(destroyedUnit) + if u != nil { + errchan <- fmt.Errorf("%s: unit %s was not destroyed as requested", r.description, destroyedUnit) + } + } +} + +// TestRunDestroyUnits checks for correct unit destruction +func TestRunDestroyUnits(t *testing.T) { + unitPrefix := "j" + results := []commandTestResults{ + { + "destroy available units", + []string{"j1", "j2", "j3", "j4", "j5"}, + 0, + }, + { + "destroy non-available units", + []string{"y1", "y2"}, + 0, + }, + { + "attempt to destroy available and non-available units", + []string{"y1", "y2", "y3", "y4", "j1", "j2", "j3", "j4", "j5", "y0"}, + 0, + }, + } + + // Check with two goroutines we don't care we should just get + // the right result. If you happen to inspect this code for + // errors then you probably got hit by a race condition in + // Destroy command that should not happen + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) + + wg.Add(2) + go func() { + defer wg.Done() + doDestroyUnits(r, errchan) + }() + go func() { + defer wg.Done() + doDestroyUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) + } + } +} diff --git a/fleetctl/fleetctl.go b/fleetctl/fleetctl.go index 951536f20..d16863635 100644 --- a/fleetctl/fleetctl.go +++ b/fleetctl/fleetctl.go @@ -59,7 +59,8 @@ recommended to upgrade fleetctl to prevent incompatibility issues. clientDriverAPI = "API" clientDriverEtcd = "etcd" - defaultEndpoint = "unix:///var/run/fleet.sock" + defaultEndpoint = "unix:///var/run/fleet.sock" + defaultSleepTime = 500 * time.Millisecond ) var ( @@ -482,6 +483,51 @@ func getChecker() *ssh.HostKeyChecker { return ssh.NewHostKeyChecker(keyFile) } +// getUnitFile attempts to get a UnitFile configuration +// It takes a unit file name as a parameter and tries first to lookup +// the unit from the local disk. If it fails, it checks if the provided +// file name may reference an instance of a template unit, if so, it +// tries to get the template configuration either from the registry or +// the local disk. +// It returns a UnitFile configuration or nil; and any error ecountered +func getUnitFile(file string) (*unit.UnitFile, error) { + var uf *unit.UnitFile + name := unitNameMangle(file) + + log.Debugf("Looking for Unit(%s) or its corresponding template", name) + + // Assume that the file references a local unit file on disk and + // attempt to load it, if it exists + if _, err := os.Stat(file); !os.IsNotExist(err) { + uf, err = getUnitFromFile(file) + if err != nil { + return nil, fmt.Errorf("failed getting Unit(%s) from file: %v", file, err) + } + } else { + // Otherwise (if the unit file does not exist), check if the + // name appears to be an instance of a template unit + info := unit.NewUnitNameInfo(name) + if err != nil { + return nil, fmt.Errorf("error extracting information from unit name %s", name) + } else if !info.IsInstance() { + return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) + } + + // If it is an instance check for a corresponding template + // unit in the Registry or disk. + // If we found a template unit, later we create a + // near-identical instance unit in the Registry - same + // unit file as the template, but different name + uf, err = getUnitFileFromTemplate(info, file) + if err != nil { + return nil, fmt.Errorf("failed getting Unit(%s) from template: %v", file, err) + } + } + + log.Debugf("Found Unit(%s)", name) + return uf, nil +} + // getUnitFromFile attempts to load a Unit from a given filename // It returns the Unit or nil, and any error encountered func getUnitFromFile(file string) (*unit.UnitFile, error) { @@ -496,6 +542,39 @@ func getUnitFromFile(file string) (*unit.UnitFile, error) { return unit.NewUnitFile(string(out)) } +// getUnitFileFromTemplate attempts to get a Unit from a template unit that +// is either in the registry or on the file system +// It takes two arguments, the template information and the unit file name +// It returns the Unit or nil; and any error encountered +func getUnitFileFromTemplate(uni *unit.UnitNameInfo, fileName string) (*unit.UnitFile, error) { + var uf *unit.UnitFile + + tmpl, err := cAPI.Unit(uni.Template) + if err != nil { + return nil, fmt.Errorf("unable to retrieve Unit(%s) from Registry: %v", uni.Template, err) + } + + if tmpl != nil { + warnOnDifferentLocalUnit(fileName, tmpl) + uf = schema.MapSchemaUnitOptionsToUnitFile(tmpl.Options) + log.Debugf("Template Unit(%s) found in registry", uni.Template) + } else { + // Finally, if we could not find a template unit in the Registry, + // check the local disk for one instead + filePath := path.Join(path.Dir(fileName), uni.Template) + if _, err := os.Stat(filePath); os.IsNotExist(err) { + return nil, fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", uni.Template) + } + + uf, err = getUnitFromFile(filePath) + if err != nil { + return nil, fmt.Errorf("unable to load Unit(%s) from file: %v", filePath, err) + } + } + + return uf, nil +} + func getTunnelFlag() string { tun := globalFlags.Tunnel if tun != "" && !strings.Contains(tun, ":") { @@ -598,8 +677,6 @@ func lazyCreateUnits(args []string) error { errchan := make(chan error) var wg sync.WaitGroup for _, arg := range args { - // TODO(jonboulle): this loop is getting too unwieldy; factor it out - arg = maybeAppendDefaultUnitType(arg) name := unitNameMangle(arg) @@ -614,45 +691,12 @@ func lazyCreateUnits(args []string) error { continue } - var uf *unit.UnitFile - // Failing that, assume the name references a local unit file on disk, and attempt to load that, if it exists - // TODO(mischief): consolidate these two near-identical codepaths - if _, err := os.Stat(arg); !os.IsNotExist(err) { - uf, err = getUnitFromFile(arg) - if err != nil { - return fmt.Errorf("failed getting Unit(%s) from file: %v", arg, err) - } - } else { - // Otherwise (if the unit file does not exist), check if the name appears to be an instance unit, - // and if so, check for a corresponding template unit in the Registry - uni := unit.NewUnitNameInfo(name) - if uni == nil { - return fmt.Errorf("error extracting information from unit name %s", name) - } else if !uni.IsInstance() { - return fmt.Errorf("unable to find Unit(%s) in Registry or on filesystem", name) - } - tmpl, err := cAPI.Unit(uni.Template) - if err != nil { - return fmt.Errorf("error retrieving template Unit(%s) from Registry: %v", uni.Template, err) - } - - // Finally, if we could not find a template unit in the Registry, check the local disk for one instead - if tmpl == nil { - file := path.Join(path.Dir(arg), uni.Template) - if _, err := os.Stat(file); os.IsNotExist(err) { - return fmt.Errorf("unable to find Unit(%s) or template Unit(%s) in Registry or on filesystem", name, uni.Template) - } - uf, err = getUnitFromFile(file) - if err != nil { - return fmt.Errorf("failed getting template Unit(%s) from file: %v", uni.Template, err) - } - } else { - warnOnDifferentLocalUnit(arg, tmpl) - uf = schema.MapSchemaUnitOptionsToUnitFile(tmpl.Options) - } - - // If we found a template unit, create a near-identical instance unit in - // the Registry - same unit file as the template, but different name + // Assume that the name references a local unit file on + // disk or if it is an instance unit and if so get its + // corresponding unit + uf, err := getUnitFile(arg) + if err != nil { + return err } _, err = createUnit(name, uf) @@ -745,6 +789,54 @@ func setTargetStateOfUnits(units []string, state job.JobState) ([]*schema.Unit, return triggered, nil } +// getBlockAttempts gets the correct value of how many attempts to try +// before giving up on an operation. +// It returns a negative value which means do not block, if zero is +// returned then it means try forever, and if a positive value is +// returned then try up to that value +func getBlockAttempts() int { + // By default we wait forever + var attempts int = 0 + + if sharedFlags.BlockAttempts > 0 { + attempts = sharedFlags.BlockAttempts + } + + if sharedFlags.NoBlock { + attempts = -1 + } + + return attempts +} + +// tryWaitForUnitStates tries to wait for units to reach the desired state. +// It takes 5 arguments, the units to wait for, the desired state, the +// desired JobState, how many attempts before timing out and a writer +// interface. +// tryWaitForUnitStates polls each of the indicated units until they +// reach the desired state. If maxAttempts is negative, then it will not +// wait, it will assume that all units reached their desired state. +// If maxAttempts is zero tryWaitForUnitStates will retry forever, and +// if it is greater than zero, it will retry up to the indicated value. +// It returns 0 on success or 1 on errors. +func tryWaitForUnitStates(units []string, state string, js job.JobState, maxAttempts int, out io.Writer) (ret int) { + // We do not wait just assume we reached the desired state + if maxAttempts <= -1 { + for _, name := range units { + stdout("Triggered unit %s %s", name, state) + } + return + } + + errchan := waitForUnitStates(units, js, maxAttempts, out) + for err := range errchan { + stderr("Error waiting for units: %v", err) + ret = 1 + } + + return +} + // waitForUnitStates polls each of the indicated units until each of their // states is equal to that which the caller indicates, or until the // polling operation times out. waitForUnitStates will retry forever, or @@ -771,7 +863,7 @@ func waitForUnitStates(units []string, js job.JobState, maxAttempts int, out io. func checkUnitState(name string, js job.JobState, maxAttempts int, out io.Writer, wg *sync.WaitGroup, errchan chan error) { defer wg.Done() - sleep := 500 * time.Millisecond + sleep := defaultSleepTime if maxAttempts < 1 { for { diff --git a/fleetctl/fleetctl_test.go b/fleetctl/fleetctl_test.go index ca635dab6..643292809 100644 --- a/fleetctl/fleetctl_test.go +++ b/fleetctl/fleetctl_test.go @@ -15,9 +15,11 @@ package main import ( + "fmt" "testing" "github.com/coreos/fleet/client" + "github.com/coreos/fleet/job" "github.com/coreos/fleet/machine" "github.com/coreos/fleet/registry" "github.com/coreos/fleet/unit" @@ -26,6 +28,68 @@ import ( "github.com/coreos/fleet/Godeps/_workspace/src/github.com/coreos/go-semver/semver" ) +type commandTestResults struct { + description string + units []string + expectedExit int +} + +func newFakeRegistryForCommands(unitPrefix string, unitCount int) client.API { + // clear machineStates for every invocation + machineStates = nil + machines := []machine.MachineState{ + newMachineState("c31e44e1-f858-436e-933e-59c642517860", "1.2.3.4", map[string]string{"ping": "pong"}), + newMachineState("595989bb-cbb7-49ce-8726-722d6e157b4e", "5.6.7.8", map[string]string{"foo": "bar"}), + } + + jobs := make([]job.Job, 0) + appendJobsForTests(&jobs, machines[0], unitPrefix, unitCount) + appendJobsForTests(&jobs, machines[1], unitPrefix, unitCount) + + states := make([]unit.UnitState, 0) + for i := 1; i <= unitCount; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", unitPrefix, i), + LoadState: "loaded", + ActiveState: "active", + SubState: "listening", + MachineID: machines[0].ID, + } + states = append(states, state) + } + + for i := 1; i <= unitCount; i++ { + state := unit.UnitState{ + UnitName: fmt.Sprintf("%s%d.service", unitPrefix, i), + LoadState: "loaded", + ActiveState: "inactive", + SubState: "dead", + MachineID: machines[1].ID, + } + states = append(states, state) + } + + reg := registry.NewFakeRegistry() + reg.SetMachines(machines) + reg.SetUnitStates(states) + reg.SetJobs(jobs) + + return &client.RegistryClient{Registry: reg} +} + +func appendJobsForTests(jobs *[]job.Job, machine machine.MachineState, prefix string, unitCount int) { + for i := 1; i <= unitCount; i++ { + j := job.Job{ + Name: fmt.Sprintf("%s%d.service", prefix, i), + Unit: unit.UnitFile{}, + TargetMachineID: machine.ID, + } + *jobs = append(*jobs, j) + } + + return +} + func newFakeRegistryForCheckVersion(v string) registry.ClusterRegistry { sv, err := semver.NewVersion(v) if err != nil { @@ -117,6 +181,37 @@ func TestUnitNameMangle(t *testing.T) { } } +func TestGetBlockAttempts(t *testing.T) { + oldNoBlock := sharedFlags.NoBlock + oldBlockAttempts := sharedFlags.BlockAttempts + + defer func() { + sharedFlags.NoBlock = oldNoBlock + sharedFlags.BlockAttempts = oldBlockAttempts + }() + + var blocktests = []struct { + noBlock bool + blockAttempts int + expected int + }{ + {true, 0, -1}, + {true, -1, -1}, + {true, 9999, -1}, + {false, 0, 0}, + {false, -1, 0}, + {false, 9999, 9999}, + } + + for _, tt := range blocktests { + sharedFlags.NoBlock = tt.noBlock + sharedFlags.BlockAttempts = tt.blockAttempts + if n := getBlockAttempts(); n != tt.expected { + t.Errorf("got %d, want %d", n, tt.expected) + } + } +} + func newUnitFile(t *testing.T, contents string) *unit.UnitFile { uf, err := unit.NewUnitFile(contents) if err != nil { diff --git a/fleetctl/load.go b/fleetctl/load.go index 497f86058..0f7b0923c 100644 --- a/fleetctl/load.go +++ b/fleetctl/load.go @@ -66,17 +66,7 @@ func runLoadUnits(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(loading, job.JobStateLoaded, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range loading { - stdout("Triggered unit %s load", name) - } - } + exit = tryWaitForUnitStates(loading, "load", job.JobStateLoaded, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/start.go b/fleetctl/start.go index 77ea0d343..2ded29539 100644 --- a/fleetctl/start.go +++ b/fleetctl/start.go @@ -74,17 +74,7 @@ func runStartUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(starting, job.JobStateLaunched, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range starting { - stdout("Triggered unit %s start", name) - } - } + exit = tryWaitForUnitStates(starting, "start", job.JobStateLaunched, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/stop.go b/fleetctl/stop.go index 262946bfb..941b33d7b 100644 --- a/fleetctl/stop.go +++ b/fleetctl/stop.go @@ -79,17 +79,7 @@ func runStopUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(stopping, job.JobStateLoaded, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range stopping { - stdout("Triggered unit %s stop", name) - } - } + exit = tryWaitForUnitStates(stopping, "stop", job.JobStateLoaded, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/stop_test.go b/fleetctl/stop_test.go new file mode 100644 index 000000000..cc6448c15 --- /dev/null +++ b/fleetctl/stop_test.go @@ -0,0 +1,96 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "sync" + "testing" + + "github.com/coreos/fleet/job" +) + +func doStopUnits(r commandTestResults, errchan chan error) { + exit := runStopUnit(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) + } + + real_units, err := findUnits(r.units) + if err != nil { + errchan <- err + return + } + + // We assume that we reached the desired state + for _, v := range real_units { + if job.JobState(v.DesiredState) != job.JobStateLoaded { + errchan <- fmt.Errorf("Error: unit %s was not stopped as requested", v.Name) + } + } +} + +func TestRunStopUnits(t *testing.T) { + unitPrefix := "stop" + oldNoBlock := sharedFlags.NoBlock + defer func() { + sharedFlags.NoBlock = oldNoBlock + }() + + results := []commandTestResults{ + { + "stop available units", + []string{"stop1", "stop2", "stop3", "stop4", "stop5"}, + 0, + }, + { + "stop non-available units", + []string{"y1", "y2"}, + 0, + }, + { + "stop available and non-available units", + []string{"y1", "y2", "y3", "y4", "stop1", "stop2", "stop3", "stop4", "stop5", "y0"}, + 0, + }, + } + + sharedFlags.NoBlock = true + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) + + wg.Add(2) + go func() { + defer wg.Done() + doStopUnits(r, errchan) + }() + go func() { + defer wg.Done() + doStopUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) + } + } +} diff --git a/fleetctl/unload.go b/fleetctl/unload.go index 6758f2825..48eb833fc 100644 --- a/fleetctl/unload.go +++ b/fleetctl/unload.go @@ -60,17 +60,7 @@ func runUnloadUnit(args []string) (exit int) { } } - if !sharedFlags.NoBlock { - errchan := waitForUnitStates(wait, job.JobStateInactive, sharedFlags.BlockAttempts, os.Stdout) - for err := range errchan { - stderr("Error waiting for units: %v", err) - exit = 1 - } - } else { - for _, name := range wait { - stdout("Triggered unit %s unload", name) - } - } + exit = tryWaitForUnitStates(wait, "unload", job.JobStateInactive, getBlockAttempts(), os.Stdout) return } diff --git a/fleetctl/unload_test.go b/fleetctl/unload_test.go new file mode 100644 index 000000000..84ff41676 --- /dev/null +++ b/fleetctl/unload_test.go @@ -0,0 +1,96 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "sync" + "testing" + + "github.com/coreos/fleet/job" +) + +func doUnloadUnits(r commandTestResults, errchan chan error) { + exit := runUnloadUnit(r.units) + if exit != r.expectedExit { + errchan <- fmt.Errorf("%s: expected exit code %d but received %d", r.description, r.expectedExit, exit) + } + + real_units, err := findUnits(r.units) + if err != nil { + errchan <- err + return + } + + // We assume that we reached the desired state + for _, v := range real_units { + if job.JobState(v.DesiredState) != job.JobStateInactive { + errchan <- fmt.Errorf("Error: unit %s was not unloaded as requested", v.Name) + } + } +} + +func TestRunUnloadUnits(t *testing.T) { + unitPrefix := "unload" + oldNoBlock := sharedFlags.NoBlock + defer func() { + sharedFlags.NoBlock = oldNoBlock + }() + + results := []commandTestResults{ + { + "unload available units", + []string{"unload1", "unload2", "unload3", "unload4", "unload5"}, + 0, + }, + { + "unload non-available units", + []string{"y1", "y2"}, + 0, + }, + { + "unload available and non-available units", + []string{"y1", "y2", "y3", "y4", "unload1", "unload2", "unload3", "unload4", "unload5", "y0"}, + 0, + }, + } + + sharedFlags.NoBlock = true + for _, r := range results { + var wg sync.WaitGroup + errchan := make(chan error) + + cAPI = newFakeRegistryForCommands(unitPrefix, len(r.units)) + + wg.Add(2) + go func() { + defer wg.Done() + doUnloadUnits(r, errchan) + }() + go func() { + defer wg.Done() + doUnloadUnits(r, errchan) + }() + + go func() { + wg.Wait() + close(errchan) + }() + + for err := range errchan { + t.Errorf("%v", err) + } + } +} diff --git a/functional/.gitignore b/functional/.gitignore new file mode 100644 index 000000000..a977916f6 --- /dev/null +++ b/functional/.gitignore @@ -0,0 +1 @@ +.vagrant/ diff --git a/functional/README.md b/functional/README.md index 815400d0e..b36aeb72f 100644 --- a/functional/README.md +++ b/functional/README.md @@ -2,52 +2,101 @@ This functional test suite deploys a fleet cluster using nspawn containers, and asserts fleet is functioning properly. -It shares an instance of etcd deployed on the host machine with each of the nspawn containers. +It shares an instance of etcd deployed on the host machine with each of the nspawn containers which use `172.18.0.1/16` network, so please make sure this network does not intersect with others. -It's recommended to run this in a virtual machine environment on CoreOS (e.g. using coreos-vagrant). The only dependency for the tests not provided on the CoreOS image is `go`. +It's recommended to run this in a virtual machine environment on CoreOS (e.g. using [Vagrant][test-in-vagrant]). -The caller must do three things before running the tests: +Since the tests utilize [`systemd-nspawn`][systemd-nspawn], this needs to be invoked as sudo/root. -1. Ensure an ssh-agent is running and the functional-testing identity is loaded. The `SSH_AUTH_SOCK` environment variable must be set. +If the tests are aborted partway through, it's currently possible for them to leave residual state as a result of the `systemd-nspawn` operations. This can be cleaned up using the `clean.sh` script. -``` -$ ssh-agent -$ ssh-add fleet/functional/fixtures/id_rsa -$ echo $SSH_AUTH_SOCK -/tmp/ssh-kwmtTOsL7978/agent.7978 -``` -2. Ensure the `FLEETD_BIN` and `FLEETCTL_BIN` environment variables point to the respective fleetd and fleetctl binaries that should be used to drive the actual tests. +### Run tests in Vagrant + +The recommended way to run the tests is to use the provided Vagrantfile, which will set up a single CoreOS instance with a one-member etcd cluster (configuration is applied using `user-data` [Cloud-Config][cloud-config] file located in this directory). +To do so, simply run the following commands on a system with Vagrant installed (see [Vagrant configuration][configure-vagrant] section of this doc) +```sh +$ git clone https://github.com/coreos/fleet +$ cd fleet/functional +$ ./run-in-vagrant ``` -$ export FLEETD_BIN=/path/to/fleetd -$ export FLEETCTL_BIN=/path/to/fleetctl + +Vagrant's provision step includes go binaries download using `functional/provision/install_go.sh` script. + +### Run tests inside other CoreOS platforms (QEMU/BareMetal/libvirt/etc) + +It's also possible to run the tests on CoreOS on other platforms. The following commands should be run *inside* the CoreOS instance. + +```sh +$ git clone https://github.com/coreos/fleet ``` -3. Make sure etcd is running on the host system. +If you didn't configure etcd2 daemon yet, just run this script: +```sh +$ sudo fleet/functional/start_etcd ``` -$ systemctl start etcd + +It will configure and start a one-member etcd cluster. + +Then run the functional tests (script will download and unpack golang into home directory): + +```sh +$ sudo fleet/functional/test ``` -Then the tests can be run with: +When `fleet/functional/test` can not find go binaries, it will download them automatically using `functional/provision/install_go.sh` script. +## Configure host environment to run Vagrant + +### Debian/Ubuntu + +#### Install Vagrant + +```sh +sudo apt-get install -y git nfs-kernel-server +wget https://releases.hashicorp.com/vagrant/1.8.1/vagrant_1.8.1_x86_64.deb +sudo dpkg -i vagrant_1.8.1_x86_64.deb ``` -# go test github.com/coreos/fleet/functional + +#### Install VirtualBox + +```sh +echo "deb http://download.virtualbox.org/virtualbox/debian $(lsb_release -sc) contrib" | sudo tee /etc/apt/sources.list.d/virtualbox.list +wget -q https://www.virtualbox.org/download/oracle_vbox.asc -O- | sudo apt-key add - +sudo apt-get update +sudo apt-get install -y build-essential dkms +sudo apt-get install -y VirtualBox-5.0 +#Previous VirtualBox (if you have problems with nested virtualization, more info here: https://www.virtualbox.org/ticket/14965) +#sudo apt-get install -y VirtualBox-4.3 ``` -Since the tests utilize `systemd-nspawn`, this needs to be invoked as sudo/root. +### CentOS/Fedora -An example test session using coreos-vagrant follows. This assumes that go is available in `/home/core/go` and the fleet repository in `/home/core/fleet` on the target machine (the easiest way to achieve this is to use shared folders). +**NOTE**: NFS and Vagrant doesn't work out of the box on CentOS 6.x, so it is recommended to use CentOS 7.x + +#### Install Vagrant + +```sh +sudo yum install -y git nfs-utils +sudo service nfs start +sudo yum install -y https://releases.hashicorp.com/vagrant/1.8.1/vagrant_1.8.1_x86_64.rpm ``` -vagrant ssh core-01 -- -A -export GOROOT="$(pwd)/go" -export PATH="${GOROOT}/bin:$PATH" -cd fleet -ssh-add functional/fixtures/id_rsa -export GOPATH="$(pwd)/gopath" -export FLEETD_BIN="$(pwd)/bin/fleetd" -export FLEETCTL_BIN="$(pwd)/bin/fleetctl" -sudo -E env PATH=$PATH go test github.com/coreos/fleet/functional -v + +#### Install VirtualBox + +```sh +source /etc/os-release +for id in $ID_LIKE $ID; do break; done +OS_ID=${id:-rhel} +curl http://download.virtualbox.org/virtualbox/rpm/$OS_ID/virtualbox.repo | sudo tee /etc/yum.repos.d/virtualbox.repo +sudo yum install -y make automake gcc gcc-c++ kernel-devel-`uname -r` dkms +sudo yum install -y VirtualBox-5.0 +#Previous VirtualBox (if you have problems with nested virtualization, more info here: https://www.virtualbox.org/ticket/14965) +#sudo yum install -y VirtualBox-4.3 ``` -If the tests are aborted partway through, it's currently possible for them to leave residual state as a result of the systemd-nspawn operations. This can be cleaned up using the `clean.sh` script. +[test-in-vagrant]: #run-tests-in-vagrant +[configure-vagrant]: #configure-host-environment-to-run-vagrant +[systemd-nspawn]: https://www.freedesktop.org/software/systemd/man/systemd-nspawn.html +[cloud-config]: https://github.com/coreos/coreos-cloudinit/blob/master/Documentation/cloud-config.md diff --git a/functional/Vagrantfile b/functional/Vagrantfile new file mode 100644 index 000000000..b2b71a66f --- /dev/null +++ b/functional/Vagrantfile @@ -0,0 +1,146 @@ +# -*- mode: ruby -*- +# # vi: set ft=ruby : +# Vagrantfile based on official CoreOS Vagranfile https://github.com/coreos/coreos-vagrant with one extra provision string + +require 'fileutils' + +Vagrant.require_version ">= 1.6.0" + +CLOUD_CONFIG_PATH = File.join(File.dirname(__FILE__), "user-data") +CONFIG = File.join(File.dirname(__FILE__), "config.rb") + +# Defaults for config options defined in CONFIG +$num_instances = 1 +$instance_name_prefix = "core" +$update_channel = "alpha" +$image_version = "current" +$enable_serial_logging = false +$share_home = false +$vm_gui = false +$vm_memory = 1024 +$vm_cpus = 1 +$shared_folders = {} +$forwarded_ports = {} + +# Attempt to apply the deprecated environment variable NUM_INSTANCES to +# $num_instances while allowing config.rb to override it +if ENV["NUM_INSTANCES"].to_i > 0 && ENV["NUM_INSTANCES"] + $num_instances = ENV["NUM_INSTANCES"].to_i +end + +if File.exist?(CONFIG) + require CONFIG +end + +# Use old vb_xxx config variables when set +def vm_gui + $vb_gui.nil? ? $vm_gui : $vb_gui +end + +def vm_memory + $vb_memory.nil? ? $vm_memory : $vb_memory +end + +def vm_cpus + $vb_cpus.nil? ? $vm_cpus : $vb_cpus +end + +Vagrant.configure("2") do |config| + # always use Vagrants insecure key + config.ssh.insert_key = false + + config.vm.box = "coreos-%s" % $update_channel + if $image_version != "current" + config.vm.box_version = $image_version + end + config.vm.box_url = "https://storage.googleapis.com/%s.release.core-os.net/amd64-usr/%s/coreos_production_vagrant.json" % [$update_channel, $image_version] + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v, override| + override.vm.box_url = "https://storage.googleapis.com/%s.release.core-os.net/amd64-usr/%s/coreos_production_vagrant_vmware_fusion.json" % [$update_channel, $image_version] + end + end + + config.vm.provider :virtualbox do |v| + # On VirtualBox, we don't have guest additions or a functional vboxsf + # in CoreOS, so tell Vagrant that so it can be smarter. + v.check_guest_additions = false + v.functional_vboxsf = false + end + + # plugin conflict + if Vagrant.has_plugin?("vagrant-vbguest") then + config.vbguest.auto_update = false + end + + (1..$num_instances).each do |i| + config.vm.define vm_name = "%s-%02d" % [$instance_name_prefix, i] do |config| + config.vm.hostname = vm_name + + if $enable_serial_logging + logdir = File.join(File.dirname(__FILE__), "log") + FileUtils.mkdir_p(logdir) + + serialFile = File.join(logdir, "%s-serial.txt" % vm_name) + FileUtils.touch(serialFile) + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v, override| + v.vmx["serial0.present"] = "TRUE" + v.vmx["serial0.fileType"] = "file" + v.vmx["serial0.fileName"] = serialFile + v.vmx["serial0.tryNoRxLoss"] = "FALSE" + end + end + + config.vm.provider :virtualbox do |vb, override| + vb.customize ["modifyvm", :id, "--uart1", "0x3F8", "4"] + vb.customize ["modifyvm", :id, "--uartmode1", serialFile] + end + end + + if $expose_docker_tcp + config.vm.network "forwarded_port", guest: 2375, host: ($expose_docker_tcp + i - 1), auto_correct: true + end + + $forwarded_ports.each do |guest, host| + config.vm.network "forwarded_port", guest: guest, host: host, auto_correct: true + end + + ["vmware_fusion", "vmware_workstation"].each do |vmware| + config.vm.provider vmware do |v| + v.gui = vm_gui + v.vmx['memsize'] = vm_memory + v.vmx['numvcpus'] = vm_cpus + end + end + + config.vm.provider :virtualbox do |vb| + vb.gui = vm_gui + vb.memory = vm_memory + vb.cpus = vm_cpus + end + + ip = "172.17.8.#{i+100}" + config.vm.network :private_network, ip: ip + + # Uncomment below to enable NFS for sharing the host machine into the coreos-vagrant VM. + #config.vm.synced_folder ".", "/home/core/share", id: "core", :nfs => true, :mount_options => ['nolock,vers=3,udp'] + $shared_folders.each_with_index do |(host_folder, guest_folder), index| + config.vm.synced_folder host_folder.to_s, guest_folder.to_s, id: "core-share%02d" % index, nfs: true, mount_options: ['nolock,vers=3,udp'] + end + + if $share_home + config.vm.synced_folder ENV['HOME'], ENV['HOME'], id: "home", :nfs => true, :mount_options => ['nolock,vers=3,udp'] + end + + if File.exist?(CLOUD_CONFIG_PATH) + config.vm.provision :file, :source => "#{CLOUD_CONFIG_PATH}", :destination => "/tmp/vagrantfile-user-data" + config.vm.provision :shell, :inline => "mv /tmp/vagrantfile-user-data /var/lib/coreos-vagrant/", :privileged => true + end + + config.vm.provision :shell, :path => "provision/install_go.sh", :privileged => false + + end + end +end diff --git a/functional/clean.sh b/functional/clean.sh index 0b3f25e0d..f86b75524 100755 --- a/functional/clean.sh +++ b/functional/clean.sh @@ -9,3 +9,5 @@ sudo rm -fr /run/systemd/system/*smoke* /tmp/smoke sudo systemctl daemon-reload ip link show fleet0 &>/dev/null && sudo ip link del fleet0 etcdctl rm --recursive /fleet_functional + +rm -f log diff --git a/functional/config.rb b/functional/config.rb new file mode 100644 index 000000000..27ae745dd --- /dev/null +++ b/functional/config.rb @@ -0,0 +1,58 @@ +# Size of the CoreOS cluster created by Vagrant +$num_instances=1 + +# coreos-vagrant is configured through a series of configuration +# options (global ruby variables) which are detailed below. To modify +# these options, first copy this file to "config.rb". Then simply +# uncomment the necessary lines, leaving the $, and replace everything +# after the equals sign.. + +# Change basename of the VM +# The default value is "core", which results in VMs named starting with +# "core-01" through to "core-${num_instances}". +#$instance_name_prefix="core" + +# Change the version of CoreOS to be installed +# To deploy a specific version, simply set $image_version accordingly. +# For example, to deploy version 709.0.0, set $image_version="709.0.0". +# The default value is "current", which points to the current version +# of the selected channel +#$image_version = "current" + +# Official CoreOS channel from which updates should be downloaded +$update_channel='stable' + +# Log the serial consoles of CoreOS VMs to log/ +# Enable by setting value to true, disable with false +# WARNING: Serial logging is known to result in extremely high CPU usage with +# VirtualBox, so should only be used in debugging situations +#$enable_serial_logging=false + +# Enable port forwarding of Docker TCP socket +# Set to the TCP port you want exposed on the *host* machine, default is 2375 +# If 2375 is used, Vagrant will auto-increment (e.g. in the case of $num_instances > 1) +# You can then use the docker tool locally by setting the following env var: +# export DOCKER_HOST='tcp://127.0.0.1:2375' +#$expose_docker_tcp=2375 + +# Enable NFS sharing of your home directory ($HOME) to CoreOS +# It will be mounted at the same path in the VM as on the host. +# Example: /Users/foobar -> /Users/foobar +#$share_home=false + +# Customize VMs +#$vm_gui = false +$vm_memory = 512 +$vm_cpus = 1 + +# Share additional folders to the CoreOS VMs +# For example, +# $shared_folders = {'/path/on/host' => '/path/on/guest', '/home/foo/app' => '/app'} +# or, to map host folders to guest folders of the same name, +# $shared_folders = Hash[*['/home/foo/app1', '/home/foo/app2'].map{|d| [d, d]}.flatten] +#$shared_folders = {} + +$shared_folders = {'../' => '/home/core/fleet'} + +# Enable port forwarding from guest(s) to host machine, syntax is: { 80 => 8080 }, auto correction is enabled by default. +#$forwarded_ports = {} diff --git a/functional/platform/nspawn.go b/functional/platform/nspawn.go index d642fd2d6..0b461d416 100644 --- a/functional/platform/nspawn.go +++ b/functional/platform/nspawn.go @@ -211,10 +211,10 @@ func (nc *nspawnCluster) prepCluster() (err error) { return } - if !strings.Contains(stdout, "172.17.0.1/16") { - _, _, err = run("ip addr add 172.17.0.1/16 dev fleet0") + if !strings.Contains(stdout, "172.18.0.1/16") { + _, _, err = run("ip addr add 172.18.0.1/16 dev fleet0") if err != nil { - log.Printf("Failed adding 172.17.0.1/16 to fleet0: %v", err) + log.Printf("Failed adding 172.18.0.1/16 to fleet0: %v", err) return } } @@ -251,7 +251,7 @@ func (nc *nspawnCluster) buildConfigDrive(dir, ip string) error { } defer userFile.Close() - etcd := "http://172.17.0.1:4001" + etcd := "http://172.18.0.1:4001" return util.BuildCloudConfig(userFile, ip, etcd, nc.keyspace()) } @@ -290,7 +290,7 @@ func (nc *nspawnCluster) createMember(id string) (m Member, err error) { nm := nspawnMember{ uuid: newMachineID(), id: id, - ip: fmt.Sprintf("172.17.1.%s", id), + ip: fmt.Sprintf("172.18.1.%s", id), } nc.members[nm.ID()] = nm @@ -303,13 +303,15 @@ func (nc *nspawnCluster) createMember(id string) (m Member, err error) { // minimum requirements for running systemd/coreos in a container fmt.Sprintf("mkdir -p %s/usr", fsdir), fmt.Sprintf("cp /etc/os-release %s/etc", fsdir), + fmt.Sprintf("echo 'core:x:500:500:CoreOS Admin:/home/core:/bin/bash' > %s/etc/passwd", fsdir), + fmt.Sprintf("echo 'core:x:500:' > %s/etc/group", fsdir), fmt.Sprintf("ln -s /proc/self/mounts %s/etc/mtab", fsdir), fmt.Sprintf("ln -s usr/lib64 %s/lib64", fsdir), fmt.Sprintf("ln -s lib64 %s/lib", fsdir), fmt.Sprintf("ln -s usr/bin %s/bin", fsdir), fmt.Sprintf("ln -s usr/sbin %s/sbin", fsdir), fmt.Sprintf("mkdir -p %s/home/core/.ssh", fsdir), - fmt.Sprintf("chown -R core:core %s/home/core", fsdir), + fmt.Sprintf("chown -R 500:500 %s/home/core", fsdir), // We don't need this, and it's slow, so mask it fmt.Sprintf("ln -s /dev/null %s/etc/systemd/system/systemd-udev-hwdb-update.service", fsdir), @@ -346,7 +348,7 @@ UseDNS no [Service] Type=oneshot RemainAfterExit=yes - ExecStart=/usr/bin/ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" -b 768` + ExecStart=/usr/bin/ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" -b 1024` if err = ioutil.WriteFile(path.Join(fsdir, "/etc/systemd/system/sshd-keygen.service"), []byte(sshd_keygen), 0644); err != nil { log.Printf("Failed writing sshd-keygen.service: %v", err) return @@ -395,6 +397,7 @@ UseDNS no return default: } + log.Printf("Dialing machine: %s", addr) c, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) if err == nil { c.Close() diff --git a/functional/provision/install_go.sh b/functional/provision/install_go.sh new file mode 100755 index 000000000..0e58cb52f --- /dev/null +++ b/functional/provision/install_go.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +export GOROOT=${HOME}/go +export PATH=${HOME}/go/bin:${PATH} + +gover=1.5.3 +gotar=go${gover}.linux-amd64.tar.gz +if [ ! -f ${HOME}/${gotar} ]; then + # Remove unfinished archive when you press Ctrl+C + trap "rm -f ${HOME}/${gotar}" INT TERM + wget --no-verbose https://storage.googleapis.com/golang/${gotar} -P ${HOME} +fi +tar -xf ${HOME}/${gotar} -C ${HOME} diff --git a/functional/run-in-vagrant b/functional/run-in-vagrant new file mode 100755 index 000000000..d528c6d27 --- /dev/null +++ b/functional/run-in-vagrant @@ -0,0 +1,7 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +cd $CDIR + +vagrant up +vagrant ssh core-01 -c "sudo ~/fleet/functional/test" diff --git a/functional/start_etcd b/functional/start_etcd new file mode 100755 index 000000000..84b853623 --- /dev/null +++ b/functional/start_etcd @@ -0,0 +1,25 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +if [[ -z "${SUDO_UID}" && "${USER_ID}" != "0" ]]; then + echo "Script should be run using sudo" + exit 1 +fi + +if [ ! -f ${HOME}/setup-network-environment ]; then + # Remove unfinished file when you press Ctrl+C + trap "rm -f ${HOME}/setup-network-environment" INT TERM + wget --no-verbose https://github.com/kelseyhightower/setup-network-environment/releases/download/1.0.1/setup-network-environment -P ${HOME} +fi + +if [ ! -x ${HOME}/setup-network-environment ]; then + chmod +x ${HOME}/setup-network-environment +fi + +${HOME}/setup-network-environment +source /etc/network-environment +export COREOS_PRIVATE_IPV4=$DEFAULT_IPV4 +coreos-cloudinit --from-file=${CDIR}/user-data diff --git a/functional/test b/functional/test new file mode 100755 index 000000000..0ccb464e0 --- /dev/null +++ b/functional/test @@ -0,0 +1,49 @@ +#!/bin/bash -e + +CDIR=$(cd `dirname $0` && pwd) +USER_ID=${SUDO_UID:-$(id -u)} +HOME=$(getent passwd "${USER_ID}" | cut -d: -f6) + +cd ${CDIR}/../ +export VERSION=$(git describe --dirty) +export GOROOT=${HOME}/go +export PATH=${HOME}/go/bin:${PATH} + +if [ ! -S "$SSH_AUTH_SOCK" ]; then + eval $(ssh-agent) +fi + +# github doesn't support explicit file permission set, this is workaround +chmod 0600 functional/fixtures/id_rsa +ssh-add functional/fixtures/id_rsa +sudo systemctl stop fleet || true + +if [[ ! $(go version 2>/dev/null) ]]; then + functional/provision/install_go.sh +fi + +if [ ! -x "bin/fleetd" ] || \ + [ ! -x "bin/fleetctl" ] || \ + [ ! $(bin/fleetctl | grep "$VERSION") ]; then + ./build +fi + +source build-env +eval $(go env) +go test github.com/coreos/fleet/functional -ldflags "${GLDFLAGS}" -v 2>&1 | tee functional/log + +total=$(grep -E '^--- (PASS|FAIL)' functional/log | wc -l) +pass=$(grep '^--- PASS' functional/log | wc -l) +fail=$(grep '^--- FAIL' functional/log | wc -l) + +echo "" +grep -E '^--- (PASS|FAIL)' functional/log + +echo "===========================================================" +echo "Functional test summary" +echo "===========================================================" +echo "# TOTAL: $total" +echo "# PASS: $pass" +echo "# FAIL: $fail" +echo "" +echo "See functional/log for the detailed output." diff --git a/functional/user-data b/functional/user-data new file mode 100644 index 000000000..9aba6418e --- /dev/null +++ b/functional/user-data @@ -0,0 +1,12 @@ +#cloud-config + +--- +coreos: + etcd2: + advertise-client-urls: http://$private_ipv4:2379 + listen-client-urls: http://0.0.0.0:2379,http://0.0.0.0:4001 + units: + - name: etcd2.service + command: start + update: + reboot-strategy: off