diff --git a/cmd/openshift-install/create.go b/cmd/openshift-install/create.go index f9ae4c6bb39..7b866ef609e 100644 --- a/cmd/openshift-install/create.go +++ b/cmd/openshift-install/create.go @@ -95,7 +95,7 @@ var ( } err = waitForBootstrapComplete(ctx, config, rootOpts.dir) - if err != nil { + if err != nil || true { if err2 := logClusterOperatorConditions(ctx, config); err2 != nil { logrus.Error("Attempted to gather ClusterOperator status after installation failure: ", err2) } diff --git a/cmd/openshift-install/gather.go b/cmd/openshift-install/gather.go index decce1a4154..0faba1a5156 100644 --- a/cmd/openshift-install/gather.go +++ b/cmd/openshift-install/gather.go @@ -3,9 +3,11 @@ package main import ( "context" "fmt" + "io/ioutil" "os" "path/filepath" "strings" + "syscall" "time" configv1 "github.com/openshift/api/config/v1" @@ -18,13 +20,14 @@ import ( "github.com/openshift/installer/pkg/asset/installconfig" assetstore "github.com/openshift/installer/pkg/asset/store" + gatheraws "github.com/openshift/installer/pkg/gather/aws" "github.com/openshift/installer/pkg/gather/ssh" "github.com/openshift/installer/pkg/terraform" - gatheraws "github.com/openshift/installer/pkg/terraform/gather/aws" - gatherazure "github.com/openshift/installer/pkg/terraform/gather/azure" - gathergcp "github.com/openshift/installer/pkg/terraform/gather/gcp" - gatherlibvirt "github.com/openshift/installer/pkg/terraform/gather/libvirt" - gatheropenstack "github.com/openshift/installer/pkg/terraform/gather/openstack" + terraformgatheraws "github.com/openshift/installer/pkg/terraform/gather/aws" + terraformgatherazure "github.com/openshift/installer/pkg/terraform/gather/azure" + terraformgathergcp "github.com/openshift/installer/pkg/terraform/gather/gcp" + terraformgatherlibvirt "github.com/openshift/installer/pkg/terraform/gather/libvirt" + terraformgatheropenstack "github.com/openshift/installer/pkg/terraform/gather/openstack" "github.com/openshift/installer/pkg/types" awstypes "github.com/openshift/installer/pkg/types/aws" azuretypes "github.com/openshift/installer/pkg/types/azure" @@ -111,17 +114,31 @@ func runGatherBootstrapCmd(directory string) error { return errors.Wrapf(err, "failed to get bootstrap and control plane host addresses from %q", tfStateFilePath) } - return logGatherBootstrap(bootstrap, port, masters, directory) + err = logGatherBootstrap(bootstrap, port, masters, directory) + if err != nil || true { + // if errno, ok := errors.Cause(err).(syscall.Errno); ok && errno == syscall.ECONNREFUSED { + err2 := gatherConsoleLogs(context.TODO(), config, bootstrap, directory) + if err2 != nil { + logrus.Error(err2) + } + // } + } + + return err } func logGatherBootstrap(bootstrap string, port int, masters []string, directory string) error { logrus.Info("Pulling debug logs from the bootstrap machine") client, err := ssh.NewClient("core", fmt.Sprintf("%s:%d", bootstrap, port), gatherBootstrapOpts.sshKeys) - if err != nil && len(gatherBootstrapOpts.sshKeys) == 0 { - return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key") - } else if err != nil { + if err != nil { + if errno, ok := err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED { + return errors.Wrap(err, "failed to connect to the bootstrap machine") + } else if len(gatherBootstrapOpts.sshKeys) == 0 { + return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key") + } return errors.Wrap(err, "failed to create SSH client") } + gatherID := time.Now().Format("20060102150405") if err := ssh.Run(client, fmt.Sprintf("/usr/local/bin/installer-gather.sh --id %s %s", gatherID, strings.Join(masters, " "))); err != nil { return errors.Wrap(err, "failed to run remote command") @@ -134,51 +151,81 @@ func logGatherBootstrap(bootstrap string, port int, masters []string, directory return nil } +func gatherConsoleLogs(ctx context.Context, installConfig *installconfig.InstallConfig, ip string, directory string) error { + var data []byte + platform := installConfig.Config.Platform.Name() + switch platform { + case awstypes.Name: + session, err := installConfig.AWS.Session(ctx) + if err != nil { + return err + } + + data, err = gatheraws.ConsoleLogs(ctx, session, ip) + if err != nil { + return err + } + default: + logrus.Debug("Unable to gather console logs on %q", platform) + return nil + } + + gatherID := time.Now().Format("20060102150405") + file := filepath.Join(directory, fmt.Sprintf("bootstrap-%s-console.log", gatherID)) + err := ioutil.WriteFile(file, data, 0666) + if err != nil { + return err + } + + logrus.Infof("Bootstrap gather logs captured here %q", file) + return nil +} + func extractHostAddresses(config *types.InstallConfig, tfstate *terraform.State) (bootstrap string, port int, masters []string, err error) { port = 22 switch config.Platform.Name() { case awstypes.Name: - bootstrap, err = gatheraws.BootstrapIP(tfstate) + bootstrap, err = terraformgatheraws.BootstrapIP(tfstate) if err != nil { return bootstrap, port, masters, err } - masters, err = gatheraws.ControlPlaneIPs(tfstate) + masters, err = terraformgatheraws.ControlPlaneIPs(tfstate) if err != nil { logrus.Error(err) } case azuretypes.Name: - bootstrap, err = gatherazure.BootstrapIP(tfstate) + bootstrap, err = terraformgatherazure.BootstrapIP(tfstate) if err != nil { return bootstrap, port, masters, err } - masters, err = gatherazure.ControlPlaneIPs(tfstate) + masters, err = terraformgatherazure.ControlPlaneIPs(tfstate) if err != nil { logrus.Error(err) } case gcptypes.Name: - bootstrap, err = gathergcp.BootstrapIP(tfstate) + bootstrap, err = terraformgathergcp.BootstrapIP(tfstate) if err != nil { return bootstrap, port, masters, err } - masters, err = gathergcp.ControlPlaneIPs(tfstate) + masters, err = terraformgathergcp.ControlPlaneIPs(tfstate) if err != nil { logrus.Error(err) } case libvirttypes.Name: - bootstrap, err = gatherlibvirt.BootstrapIP(tfstate) + bootstrap, err = terraformgatherlibvirt.BootstrapIP(tfstate) if err != nil { return bootstrap, port, masters, err } - masters, err = gatherlibvirt.ControlPlaneIPs(tfstate) + masters, err = terraformgatherlibvirt.ControlPlaneIPs(tfstate) if err != nil { logrus.Error(err) } case openstacktypes.Name: - bootstrap, err = gatheropenstack.BootstrapIP(tfstate) + bootstrap, err = terraformgatheropenstack.BootstrapIP(tfstate) if err != nil { return bootstrap, port, masters, err } - masters, err = gatheropenstack.ControlPlaneIPs(tfstate) + masters, err = terraformgatheropenstack.ControlPlaneIPs(tfstate) if err != nil { logrus.Error(err) } diff --git a/pkg/gather/aws/OWNERS b/pkg/gather/aws/OWNERS new file mode 100644 index 00000000000..6e59d685aa6 --- /dev/null +++ b/pkg/gather/aws/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - aws-approvers +reviewers: + - aws-reviewers diff --git a/pkg/gather/aws/console.go b/pkg/gather/aws/console.go new file mode 100644 index 00000000000..91c3454b131 --- /dev/null +++ b/pkg/gather/aws/console.go @@ -0,0 +1,68 @@ +// Package AWS provides AWS-specific tools for gathering debugging information. +package aws + +import ( + "context" + "encoding/base64" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/pkg/errors" +) + +// ConsoleLogs retrieves console logs from the AWS instance with the +// given IP address. +func ConsoleLogs(ctx context.Context, session *session.Session, ip string) ([]byte, error) { + client := ec2.New(session) + var instanceID string + err := client.DescribeInstancesPagesWithContext( + ctx, + &ec2.DescribeInstancesInput{ + Filters: []*ec2.Filter{{ + Name: aws.String("ip-address"), + Values: []*string{&ip}, + }}, + }, + func(results *ec2.DescribeInstancesOutput, lastPage bool) bool { + for _, reservation := range results.Reservations { + for _, instance := range reservation.Instances { + if instance.InstanceId != nil { + instanceID = *instance.InstanceId + return false + } + } + } + + return !lastPage + }, + ) + if err != nil { + return nil, errors.Wrap(err, "describe instances") + } + + if instanceID == "" { + return nil, errors.Errorf("unable to find an AWS instance ID for %q", ip) + } + + consoleOutput, err := client.GetConsoleOutputWithContext( + ctx, + &ec2.GetConsoleOutputInput{ + InstanceId: &instanceID, + Latest: aws.Bool(true), + }, + ) + if err != nil { + return nil, errors.Wrapf(err, "get console output for %s", instanceID) + } + if consoleOutput.Output == nil { + return nil, errors.Errorf("nil console output for %s", instanceID) + } + + data, err := base64.StdEncoding.DecodeString(*consoleOutput.Output) + if err != nil { + return nil, errors.Wrapf(err, "decoding console output for %s", instanceID) + } + + return data, nil +}