Skip to content

Commit

Permalink
Graceful sidecar support
Browse files Browse the repository at this point in the history
TL;DR Tekton Pipelines now gracefully start and stop with sidecars
running alongside TaskRun pods.

Long Version :-

TaskRun pods can have sidecars injected by Admission Controllers. This
is common practice with Istio where a proxy sidecar is injected into pods
so that they can be included as part of a service mesh. Unfortunately
there is no built-in Kubernetes mechanism to set the lifetime of a
sidecar container to match that of the pod's "primary" container. In
practice this means that pods injected with sidecars will not stop until
both the primary and all sidecar containers are stopped.

Prior to this commit injected sidecars would cause a TaskRun pod to run
forever, even when all Step containers of a Task had completed. This
commit introduces mechanisms to start and stop sidecars gracefully
meaning that a) Step containers wait until all sidecars are in a ready
state before starting and b) Sidecar containers are stopped when a
TaskRun's Step containers are done.

No end-to-end tests have been added as part of this PR because doing so
would require Istio to be enabled on the CI cluster (or a dummy
Admission Controller / Mutating Webhook thing to be written just for
injecting containers into test Pods). The Istio requirement would put an
undue burden on those users running tests in non-GKE environments. It's
currently planned for a follow-up PR to introduce a) user-defined
sidecars and b) a Tekton-injected sidecar that performs logging. Once that
work is complete it will be much simpler to e2e test this sidecar code.

Further to the above, a number of smaller refactors have taken place in
this PR:

- a sidecars package has been created to encapsulate the logic for
stopping sidecars with the nop container image
- the updateStatusFromPod func has been moved into a taskrun/status
package and broken down into constituent helpers
- a small amount of dead code has been eliminated

Co-authored-by: Jose Blas Camacho Taboada <[email protected]>
Co-authored-by: Scott Seaward <[email protected]>
  • Loading branch information
3 people authored and tekton-robot committed Jun 17, 2019
1 parent 3dfea62 commit ace3ab6
Show file tree
Hide file tree
Showing 20 changed files with 1,301 additions and 722 deletions.
45 changes: 29 additions & 16 deletions cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,26 @@ import (
)

var (
ep = flag.String("entrypoint", "", "Original specified entrypoint to execute")
waitFile = flag.String("wait_file", "", "If specified, file to wait for")
postFile = flag.String("post_file", "", "If specified, file to write upon completion")
ep = flag.String("entrypoint", "", "Original specified entrypoint to execute")
waitFile = flag.String("wait_file", "", "If specified, file to wait for")
waitFileContent = flag.Bool("wait_file_content", false, "If specified, expect wait_file to have content")
postFile = flag.String("post_file", "", "If specified, file to write upon completion")

waitPollingInterval = time.Second
)

func main() {
flag.Parse()

e := entrypoint.Entrypointer{
Entrypoint: *ep,
WaitFile: *waitFile,
PostFile: *postFile,
Args: flag.Args(),
Waiter: &RealWaiter{},
Runner: &RealRunner{},
PostWriter: &RealPostWriter{},
Entrypoint: *ep,
WaitFile: *waitFile,
WaitFileContent: *waitFileContent,
PostFile: *postFile,
Args: flag.Args(),
Waiter: &RealWaiter{},
Runner: &RealRunner{},
PostWriter: &RealPostWriter{},
}
if err := e.Go(); err != nil {
switch err.(type) {
Expand Down Expand Up @@ -75,18 +79,27 @@ type RealWaiter struct{}

var _ entrypoint.Waiter = (*RealWaiter)(nil)

func (*RealWaiter) Wait(file string) error {
// Wait watches a file and returns when either a) the file exists and, if
// the expectContent argument is true, the file has non-zero size or b) there
// is an error polling the file.
//
// If the passed-in file is an empty string then this function returns
// immediately.
//
// If a file of the same name with a ".err" extension exists then this Wait
// will end with a skipError.
func (*RealWaiter) Wait(file string, expectContent bool) error {
if file == "" {
return nil
}
for ; ; time.Sleep(time.Second) {
// Watch for the post file
if _, err := os.Stat(file); err == nil {
return nil
for ; ; time.Sleep(waitPollingInterval) {
if info, err := os.Stat(file); err == nil {
if !expectContent || info.Size() > 0 {
return nil
}
} else if !os.IsNotExist(err) {
return xerrors.Errorf("Waiting for %q: %w", file, err)
}
// Watch for the post error file
if _, err := os.Stat(file + ".err"); err == nil {
return skipError("error file present, bail and skip the step")
}
Expand Down
106 changes: 106 additions & 0 deletions cmd/entrypoint/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package main

import (
"io/ioutil"
"os"
"testing"
"time"
)

func TestRealWaiterWaitMissingFile(t *testing.T) {
// Create a temp file and then immediately delete it to get
// a legitimate tmp path and ensure the file doesnt exist
// prior to testing Wait().
tmp, err := ioutil.TempFile("", "real_waiter_test_file")
if err != nil {
t.Errorf("error creating temp file: %v", err)
}
os.Remove(tmp.Name())
rw := RealWaiter{}
doneCh := make(chan struct{})
go func() {
err := rw.Wait(tmp.Name(), false)
if err != nil {
t.Errorf("error waiting on tmp file %q", tmp.Name())
}
close(doneCh)
}()
select {
case <-doneCh:
t.Errorf("did not expect Wait() to have detected a file at path %q", tmp.Name())
case <-time.After(2 * waitPollingInterval):
// Success
}
}

func TestRealWaiterWaitWithFile(t *testing.T) {
tmp, err := ioutil.TempFile("", "real_waiter_test_file")
if err != nil {
t.Errorf("error creating temp file: %v", err)
}
defer os.Remove(tmp.Name())
rw := RealWaiter{}
doneCh := make(chan struct{})
go func() {
err := rw.Wait(tmp.Name(), false)
if err != nil {
t.Errorf("error waiting on tmp file %q", tmp.Name())
}
close(doneCh)
}()
select {
case <-doneCh:
// Success
case <-time.After(2 * waitPollingInterval):
t.Errorf("expected Wait() to have detected the file's existence by now")
}
}

func TestRealWaiterWaitMissingContent(t *testing.T) {
tmp, err := ioutil.TempFile("", "real_waiter_test_file")
if err != nil {
t.Errorf("error creating temp file: %v", err)
}
defer os.Remove(tmp.Name())
rw := RealWaiter{}
doneCh := make(chan struct{})
go func() {
err := rw.Wait(tmp.Name(), true)
if err != nil {
t.Errorf("error waiting on tmp file %q", tmp.Name())
}
close(doneCh)
}()
select {
case <-doneCh:
t.Errorf("no data was written to tmp file, did not expect Wait() to have detected a non-zero file size and returned")
case <-time.After(2 * waitPollingInterval):
// Success
}
}

func TestRealWaiterWaitWithContent(t *testing.T) {
tmp, err := ioutil.TempFile("", "real_waiter_test_file")
if err != nil {
t.Errorf("error creating temp file: %v", err)
}
defer os.Remove(tmp.Name())
rw := RealWaiter{}
doneCh := make(chan struct{})
go func() {
err := rw.Wait(tmp.Name(), true)
if err != nil {
t.Errorf("error waiting on tmp file %q", tmp.Name())
}
close(doneCh)
}()
if err := ioutil.WriteFile(tmp.Name(), []byte("😺"), 0700); err != nil {
t.Errorf("error writing content to temp file: %v", err)
}
select {
case <-doneCh:
// Success
case <-time.After(2 * waitPollingInterval):
t.Errorf("expected Wait() to have detected a non-zero file size by now")
}
}
6 changes: 6 additions & 0 deletions cmd/nop/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

// The nop command is a no-op, it simply prints a message and exits. Nop
// is used to stop sidecar containers in TaskRun Pods. When a Task's Steps
// are complete any sidecars running alongside the Step containers need
// to be terminated. Whatever image the sidecars are running is replaced
// with nop and the sidecar quickly exits.

package main

import "fmt"
Expand Down
30 changes: 30 additions & 0 deletions docs/developers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ manage the execution order of the containers. The `entrypoint` binary has the
following arguments:

- `wait_file` - If specified, file to wait for
- `wait_file_content` - If specified, wait until the file has non-zero size
- `post_file` - If specified, file to write upon completion
- `entrypoint` - The command to run in the image being wrapped

Expand All @@ -155,3 +156,32 @@ such as the following:
- The environment variable HOME is set to `/builder/home`, used by the builder
tools and injected on into all of the step containers
- Default location for output-images `/builder/output-images`

## Handling of injected sidecars

Tekton has to take some special steps to support sidecars that are injected into
TaskRun Pods. Without intervention sidecars will typically run for the entire
lifetime of a Pod but in Tekton's case it's desirable for the sidecars to run
only as long as Steps take to complete. There's also a need for Tekton to
schedule the sidecars to start before a Task's Steps begin, just in case the
Steps rely on a sidecars behaviour, for example to join an Istio service mesh.
To handle all of this, Tekton Pipelines implements the following lifecycle
for sidecar containers:

First, the [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/#the-downward-api)
is used to project an annotation on the TaskRun's Pod into the `entrypoint`
container as a file. The annotation starts as an empty string, so the file
projected by the downward API has zero length. The entrypointer spins, waiting
for that file to have non-zero size.

The sidecar containers start up. Once they're all in a ready state, the
annotation is populated with string "READY", which in turn populates the
Downward API projected file. The entrypoint binary recognizes
that the projected file has a non-zero size and allows the Task's steps to
begin.

On completion of all steps in a Task the TaskRun reconciler stops any
sidecar containers. The `Image` field of any sidecar containers is swapped
to the nop image. Kubernetes observes the change and relaunches the container
with updated container image. The nop container image exits. The container
is considered `Terminated` by Kubernetes and the TaskRun's Pod stops.
24 changes: 24 additions & 0 deletions docs/taskruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ A `TaskRun` runs until all `steps` have completed or until a failure occurs.
- [Steps](#steps)
- [Cancelling a TaskRun](#cancelling-a-taskrun)
- [Examples](#examples)
- [Sidecars](#sidecars)
- [Logs](logs.md)

---
Expand Down Expand Up @@ -544,6 +545,29 @@ of the `Task` resource object.
For examples and more information about specifying service accounts, see the
[`ServiceAccount`](./auth.md) reference topic.

## Sidecars

A well-established pattern in Kubernetes is that of the "sidecar" - a
container which runs alongside your workloads to provide ancillary support.
Typical examples of the sidecar pattern are logging daemons, services to
update files on a shared volume, and network proxies.

Tekton doesn't provide a mechanism to specify sidecars for Task steps
but it's still possible for sidecars to be added to your Pods:
[Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/)
provide cluster admins a mechanism to inject sidecar containers as Pods launch.
As a concrete example this is one possible method [used by Istio](https://istio.io/docs/setup/kubernetes/additional-setup/sidecar-injection/#automatic-sidecar-injection)
to inject an envoy proxy in to pods so that they can be included as part of
Istio's service mesh.

Tekton will happily work with sidecars injected into a TaskRun's
pods but the behaviour is a bit nuanced: When TaskRun's steps are complete
any sidecar containers running inside the Pod will be terminated. In
order to terminate the sidecars they will be restarted with a new
"nop" image that quickly exits. The result will be that your TaskRun's
Pod will include the sidecar container with a Retry Count of 1 and
with a different container image than you might be expecting.

---

Except as otherwise noted, the content of this page is licensed under the
Expand Down
7 changes: 5 additions & 2 deletions pkg/entrypoint/entrypointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ type Entrypointer struct {
// WaitFile is the file to wait for. If not specified, execution begins
// immediately.
WaitFile string
// WaitFileContent indicates the WaitFile should have non-zero size
// before continuing with execution.
WaitFileContent bool
// PostFile is the file to write when complete. If not specified, no
// file is written.
PostFile string
Expand All @@ -45,7 +48,7 @@ type Entrypointer struct {
// Waiter encapsulates waiting for files to exist.
type Waiter interface {
// Wait blocks until the specified file exists.
Wait(file string) error
Wait(file string, expectContent bool) error
}

// Runner encapsulates running commands.
Expand All @@ -63,7 +66,7 @@ type PostWriter interface {
// post file.
func (e Entrypointer) Go() error {
if e.WaitFile != "" {
if err := e.Waiter.Wait(e.WaitFile); err != nil {
if err := e.Waiter.Wait(e.WaitFile, e.WaitFileContent); err != nil {
// An error happened while waiting, so we bail
// *but* we write postfile to make next steps bail too
e.WritePostFile(e.PostFile, err)
Expand Down
4 changes: 2 additions & 2 deletions pkg/entrypoint/entrypointer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func TestEntrypointer(t *testing.T) {

type fakeWaiter struct{ waited *string }

func (f *fakeWaiter) Wait(file string) error {
func (f *fakeWaiter) Wait(file string, expectContent bool) error {
f.waited = &file
return nil
}
Expand All @@ -193,7 +193,7 @@ func (f *fakePostWriter) Write(file string) { f.wrote = &file }

type fakeErrorWaiter struct{ waited *string }

func (f *fakeErrorWaiter) Wait(file string) error {
func (f *fakeErrorWaiter) Wait(file string, expectContent bool) error {
f.waited = &file
return xerrors.New("waiter failed")
}
Expand Down
Loading

0 comments on commit ace3ab6

Please sign in to comment.