Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: support debug shell for advanced development #9201

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ ARG PKG_CNI
ARG PKG_FLANNEL_CNI
ARG PKG_TALOSCTL_CNI_BUNDLE_INSTALL

ARG DEBUG_TOOLS_SOURCE
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not used anywhere below, so all the debug stuff would get copied always which we don't want


# Resolve package images using ${PKGS} to be used later in COPY --from=.

FROM ${PKG_FHS} AS pkg-fhs
Expand Down Expand Up @@ -140,6 +142,23 @@ FROM ${PKG_KERNEL} AS pkg-kernel
FROM --platform=amd64 ${PKG_KERNEL} AS pkg-kernel-amd64
FROM --platform=arm64 ${PKG_KERNEL} AS pkg-kernel-arm64

FROM --platform=amd64 ${TOOLS} as tools-amd64
FROM --platform=arm64 ${TOOLS} as tools-arm64

FROM scratch as pkg-debug-tools-amd64
COPY --from=tools-amd64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-amd64 /toolchain/lib/ld-musl-x86_64.so.1 /toolchain/toolchain/lib/ld-musl-x86_64.so.1
COPY --from=tools-amd64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-amd64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-amd64 /toolchain/bin/tee /toolchain/bin/tee

FROM scratch as pkg-debug-tools-arm64
COPY --from=tools-arm64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-arm64 /toolchain/lib/ld-musl-aarch64.so.1 /toolchain/toolchain/lib/ld-musl-aarch64.so.1
COPY --from=tools-arm64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-arm64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-arm64 /toolchain/bin/tee /toolchain/bin/tee

# Strip CNI package.

FROM scratch AS pkg-cni-stripped-amd64
Expand Down Expand Up @@ -651,6 +670,10 @@ COPY --link --from=pkg-kmod-amd64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-amd64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-amd64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-amd64 /machined /rootfs/sbin/init

# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-amd64 * /rootfs/

RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff
Expand Down Expand Up @@ -721,6 +744,10 @@ COPY --link --from=pkg-kmod-arm64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-arm64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-arm64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-arm64 /machined /rootfs/sbin/init

# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-arm64 * /rootfs/

RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ CI_RELEASE_TAG := $(shell git log --oneline --format=%B -n 1 HEAD^2 -- 2>/dev/nu
ARTIFACTS := _out
TOOLS ?= ghcr.io/siderolabs/tools:v1.9.0-alpha.0-3-g1151610

DEBUG_TOOLS_SOURCE := scratch

PKGS_PREFIX ?= ghcr.io/siderolabs
PKGS ?= v1.9.0-alpha.0-18-gba0341e
EXTRAS ?= v1.9.0-alpha.0
Expand Down Expand Up @@ -146,6 +148,11 @@ else
GO_LDFLAGS += -s -w
endif

ifneq (, $(filter $(WITH_DEBUG_SHELL), t true TRUE y yes 1))
# bash-minimal is a Dockerfile target that copies over the bash from siderolabs tools
DEBUG_TOOLS_SOURCE := bash-minimal
endif

GO_BUILDFLAGS_TALOSCTL := $(GO_BUILDFLAGS) -tags "$(GO_BUILDTAGS_TALOSCTL)"
GO_BUILDFLAGS += -tags "$(GO_BUILDTAGS)"

Expand All @@ -160,6 +167,7 @@ COMMON_ARGS += --progress=$(PROGRESS)
COMMON_ARGS += --platform=$(PLATFORM)
COMMON_ARGS += --push=$(PUSH)
COMMON_ARGS += --build-arg=TOOLS=$(TOOLS)
COMMON_ARGS += --build-arg=DEBUG_TOOLS_SOURCE=$(DEBUG_TOOLS_SOURCE)
dsseng marked this conversation as resolved.
Show resolved Hide resolved
COMMON_ARGS += --build-arg=PKGS=$(PKGS)
COMMON_ARGS += --build-arg=EXTRAS=$(EXTRAS)
COMMON_ARGS += --build-arg=GOFUMPT_VERSION=$(GOFUMPT_VERSION)
Expand Down
26 changes: 26 additions & 0 deletions cmd/talosctl/cmd/mgmt/cluster/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ const (
controlPlanePortFlag = "control-plane-port"
firewallFlag = "with-firewall"
tpm2EnabledFlag = "with-tpm2"
withDebugShellFlag = "with-debug-shell"

// The following flags are the gen options - the options that are only used in machine configuration (i.e., not during the qemu/docker provisioning).
// They are not applicable when no machine configuration is generated, hence mutually exclusive with the --input-dir flag.
Expand Down Expand Up @@ -188,6 +189,7 @@ var (
withFirewall string
withUUIDHostnames bool
withSiderolinkAgent agentFlag
debugShellEnabled bool
)

// createCmd represents the cluster up command.
Expand Down Expand Up @@ -468,13 +470,20 @@ func create(ctx context.Context) error {
provision.WithBootlader(bootloaderEnabled),
provision.WithUEFI(uefiEnabled),
provision.WithTPM2(tpm2Enabled),
provision.WithDebugShell(debugShellEnabled),
provision.WithExtraUEFISearchPaths(extraUEFISearchPaths),
provision.WithTargetArch(targetArch),
provision.WithSiderolinkAgent(withSiderolinkAgent.IsEnabled()),
}

var configBundleOpts []bundle.Option

if debugShellEnabled {
if provisionerName != "qemu" {
return errors.New("debug shell only supported with qemu provisioner")
}
}

if ports != "" {
if provisionerName != docker {
return errors.New("exposed-ports flag only supported with docker provisioner")
Expand Down Expand Up @@ -934,6 +943,21 @@ func create(ctx context.Context) error {
return err
}

if debugShellEnabled {
fmt.Println("You can now connect to debug shell on any node using these commands:")

for _, node := range request.Nodes {
talosDir, err := clientconfig.GetTalosDirectory()
if err != nil {
return nil
}

fmt.Printf("socat - UNIX-CONNECT:%s\n", filepath.Join(talosDir, "clusters", clusterName, node.Name+".serial"))
}

return nil
}

// No talosconfig in the bundle - skip the operations below
if bundleTalosconfig == nil {
return nil
Expand Down Expand Up @@ -1172,6 +1196,8 @@ func init() {
createCmd.Flags().BoolVar(&bootloaderEnabled, bootloaderEnabledFlag, true, "enable bootloader to load kernel and initramfs from disk image after install")
createCmd.Flags().BoolVar(&uefiEnabled, "with-uefi", true, "enable UEFI on x86_64 architecture")
createCmd.Flags().BoolVar(&tpm2Enabled, tpm2EnabledFlag, false, "enable TPM2 emulation support using swtpm")
createCmd.Flags().BoolVar(&debugShellEnabled, withDebugShellFlag, false, "drop talos into a maintenance shell on boot, this is for advanced debugging for developers only")
createCmd.Flags().MarkHidden("with-debug-shell") //nolint:errcheck
createCmd.Flags().StringSliceVar(&extraUEFISearchPaths, "extra-uefi-search-paths", []string{}, "additional search paths for UEFI firmware (only applies when UEFI is enabled)")
createCmd.Flags().StringSliceVar(&registryMirrors, registryMirrorFlag, []string{}, "list of registry mirrors to use in format: <registry host>=<mirror URL>")
createCmd.Flags().StringSliceVar(&registryInsecure, registryInsecureFlag, []string{}, "list of registry hostnames to skip TLS verification for")
Expand Down
11 changes: 11 additions & 0 deletions internal/pkg/mount/switchroot/switchroot.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"path/filepath"

"github.com/siderolabs/go-debug"
"github.com/siderolabs/go-procfs/procfs"
"golang.org/x/sys/unix"

"github.com/siderolabs/talos/internal/pkg/mount"
Expand All @@ -28,6 +29,8 @@ var preservedPaths = map[string]struct{}{

// Switch moves the rootfs to a specified directory. See
// https:/karelzak/util-linux/blob/master/sys-utils/switch_root.c.
//
//nolint:gocyclo
func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Println("moving mounts to the new rootfs")

Expand Down Expand Up @@ -87,6 +90,14 @@ func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Printf("race detection enabled with halt_on_error=1")
}

if val := procfs.ProcCmdline().Get("talos.debugshell"); val != nil {
if err = unix.Exec("/bin/bash", []string{"/bin/bash"}, envv); err != nil {
return fmt.Errorf("error executing /bin/bash: %w", err)
}

return nil
}

if err = unix.Exec("/sbin/init", []string{"/sbin/init"}, envv); err != nil {
return fmt.Errorf("error executing /sbin/init: %w", err)
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/provision/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ func WithTPM2(enabled bool) Option {
}
}

// WithDebugShell drops into debug shell in initramfs.
func WithDebugShell(enabled bool) Option {
return func(o *Options) error {
o.WithDebugShell = enabled

return nil
}
}

// WithExtraUEFISearchPaths configures additional search paths to look for UEFI firmware.
func WithExtraUEFISearchPaths(extraUEFISearchPaths []string) Option {
return func(o *Options) error {
Expand Down Expand Up @@ -157,6 +166,8 @@ type Options struct {
UEFIEnabled bool
// Enable TPM2 emulation using swtpm.
TPM2Enabled bool
// Enable debug shell in the bootloader.
WithDebugShell bool
// Configure additional search paths to look for UEFI firmware.
ExtraUEFISearchPaths []string

Expand Down
9 changes: 9 additions & 0 deletions pkg/provision/providers/qemu/launch.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type LaunchConfig struct {
NodeUUID uuid.UUID
BadRTC bool
ArchitectureData Arch
WithDebugShell bool

// Talos config
Config string
Expand Down Expand Up @@ -320,6 +321,14 @@ func launchVM(config *LaunchConfig) error {
"pause",
}

if config.WithDebugShell {
args = append(
args,
"-serial",
fmt.Sprintf("unix:%s/%s.serial,server,nowait", config.StatePath, config.Hostname),
)
}

var (
scsiAttached, ahciAttached, nvmeAttached bool
ahciBus int
Expand Down
5 changes: 5 additions & 0 deletions pkg/provision/providers/qemu/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
}
}

if opts.WithDebugShell {
cmdline.Append("talos.debugshell", "")
}

var nodeConfig string

if !nodeReq.SkipInjectingConfig {
Expand Down Expand Up @@ -157,6 +161,7 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
TFTPServer: nodeReq.TFTPServer,
IPXEBootFileName: nodeReq.IPXEBootFilename,
APIPort: apiPort,
WithDebugShell: opts.WithDebugShell,
}

if clusterReq.IPXEBootScript != "" {
Expand Down
Loading