From 1bc6ef69ba307c53653e61b1e545a261af668d35 Mon Sep 17 00:00:00 2001 From: Abdhesh Nayak Date: Mon, 19 Jun 2023 11:45:17 +0530 Subject: [PATCH 1/4] wip --- .../nodectrl/internal/domain/aws-spot/main.go | 473 ++++++++++++++++++ apps/nodectrl/internal/domain/aws/main.go | 98 ++-- .../internal/domain/provider-client-fx.go | 17 + apps/nodectrl/internal/env/env.go | 3 - apps/nodectrl/terraform/aws-spot/init.sh | 10 + apps/nodectrl/terraform/aws-spot/resource.tf | 179 +++++++ apps/nodectrl/terraform/aws-spot/variables.tf | 23 + apps/nodectrl/terraform/aws/resource.tf | 1 - 8 files changed, 752 insertions(+), 52 deletions(-) create mode 100644 apps/nodectrl/internal/domain/aws-spot/main.go create mode 100644 apps/nodectrl/terraform/aws-spot/init.sh create mode 100644 apps/nodectrl/terraform/aws-spot/resource.tf create mode 100644 apps/nodectrl/terraform/aws-spot/variables.tf diff --git a/apps/nodectrl/internal/domain/aws-spot/main.go b/apps/nodectrl/internal/domain/aws-spot/main.go new file mode 100644 index 000000000..0d5c916d3 --- /dev/null +++ b/apps/nodectrl/internal/domain/aws-spot/main.go @@ -0,0 +1,473 @@ +package awsspot + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/aws" + "kloudlite.io/apps/nodectrl/internal/domain/common" + "kloudlite.io/apps/nodectrl/internal/domain/utils" + awss3 "kloudlite.io/pkg/aws-s3" +) + +type spotNodeConfig struct { + ServerIP string `yaml:"serverIp"` + Token string `yaml:"token"` + NodeName string `yaml:"nodeName"` + Taints []string `yaml:"taints"` + Labels map[string]string `yaml:"labels"` +} + +type awsSpotClient struct { + node aws.AWSNode + awsS3Client awss3.AwsS3 + + accessKey string + accessSecret string + accountName string + accountId string + + tfTemplates string + labels map[string]string + taints []string +} + +// AddMaster implements common.ProviderClient. +func (a awsSpotClient) AddMaster(ctx context.Context) error { + // fetch token + sshPath := path.Join("/tmp/ssh", a.accountName) + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { + return e + } + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := aws.TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + if err := a.writeSpotNode(kc); err != nil { + return err + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + // create node and wait for ready + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%v/access", sshPath), + string(ip), + ), + "checking if node is ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + // attach to cluster as master + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", + sshPath, + string(ip), + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + string(ip), + a.node.NodeId, + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { + return err + } + + return nil +} + +func (a awsSpotClient) AddWorker(ctx context.Context) error { + // fetch token + + sshPath := path.Join("/tmp/ssh", a.accountName) + + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { + return e + } + } + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := aws.TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + if err := a.writeSpotNode(kc); err != nil { + return err + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + // create node and wait for ready + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%s/access", sshPath), + string(ip), + ), + "checking if node ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + labels := func() []string { + l := []string{} + for k, v := range map[string]string{ + "kloudlite.io/public-ip": string(ip), + } { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + + for k, v := range a.labels { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + return l + }() + + // attach to cluster as workernode + + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", + sshPath, + ip, + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + ip, + a.node.NodeId, + strings.Join(labels, " "), + func() string { + if a.node.IsGpu { + // return "--docker" + // return "--docker" + return "" + } + return "" + }(), + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { + return err + } + + return nil +} + +func (a awsSpotClient) SetupSSH() error { + const sshDir = "/tmp/ssh" + + if _, err := os.Stat(sshDir); err != nil { + err := os.Mkdir(sshDir, os.ModePerm) + if err != nil { + return err + } + } + + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) + + if err := a.awsS3Client.IsFileExists(fileName); err != nil { + + if _, err := os.Stat(destDir); err == nil { + if err := os.RemoveAll(destDir); err != nil { + return err + } + } + + if e := os.Mkdir(destDir, os.ModePerm); e != nil { + return e + } + + privateKeyBytes, publicKeyBytes, err := utils.GenerateKeys() + if err != nil { + return err + } + + if err := os.WriteFile(fmt.Sprintf("%s/access.pub", destDir), publicKeyBytes, os.ModePerm); err != nil { + return err + } + + if err := os.WriteFile(fmt.Sprintf("%s/access", destDir), privateKeyBytes, 0400); err != nil { + return err + } + return nil + } + + if err := os.RemoveAll(destDir); err != nil { + return err + } + + err := a.awsS3Client.DownloadFile(path.Join(sshDir, fileName), fileName) + if err != nil { + return err + } + + _, err = utils.Unzip(path.Join(sshDir, fileName), sshDir) + if err != nil { + return err + } + + return nil +} + +func (a awsSpotClient) saveForSure() error { + count := 0 + for { + if err := a.saveSSH(); err == nil { + return nil + } + if count >= 10 { + return fmt.Errorf("coudn't save the state") + } + + time.Sleep(time.Second * 20) + count++ + } +} + +func (a awsSpotClient) saveSSH() error { + const sshDir = "/tmp/ssh" + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) + + if err := utils.ZipSource(destDir, path.Join(sshDir, fileName)); err != nil { + return err + } + + if err := a.awsS3Client.UploadFile(path.Join(sshDir, fileName), fileName); err != nil { + return err + } + + return nil +} + +// CreateCluster implements common.ProviderClient +func (a awsSpotClient) CreateCluster(ctx context.Context) error { + return fmt.Errorf("you can't create cluster using aws spot for now") +} + +func parseValues(a awsSpotClient, sshPath string) map[string]string { + values := map[string]string{} + + values["access_key"] = a.accessKey + values["secret_key"] = a.accessSecret + + values["region"] = a.node.Region + values["node_id"] = a.node.NodeId + values["keys-path"] = sshPath + values["account_id"] = a.accountId + + // TODO: ami according to region + // ami is fixed for now + // values["ami"] = a.node.ImageId + + return values +} + +func (a awsSpotClient) SaveToDbGuranteed(ctx context.Context) { + for { + if err := utils.SaveToDb(a.node.NodeId, a.awsS3Client); err == nil { + break + } else { + fmt.Println(err) + } + time.Sleep(time.Second * 20) + } +} + +func (a awsSpotClient) writeSpotNode(kc aws.TokenAndKubeconfig) error { + const sshDir = "/tmp/ssh" + sshPath := path.Join(sshDir, a.accountName) + dataPath := path.Join(sshPath, "data.yaml") + + nConfig := spotNodeConfig{ + ServerIP: kc.ServerIp, + Token: kc.Token, + NodeName: a.node.NodeId, + Taints: []string{}, + Labels: map[string]string{}, + } + + out, err := yaml.Marshal(nConfig) + if err != nil { + return err + } + + return os.WriteFile(dataPath, out, os.ModePerm) +} + +// NewNode implements ProviderClient +func (a awsSpotClient) NewNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + + values := parseValues(a, sshPath) + + if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws-spot"), a.awsS3Client, true); err != nil { + return err + } + + defer a.SaveToDbGuranteed(ctx) + + // upload the final state to the db, upsert if db is already present + + // apply the tf file + if err := func() error { + if err := utils.InitTFdir(path.Join(utils.Workdir, a.node.NodeId)); err != nil { + return err + } + + if err := utils.ApplyTF(path.Join(utils.Workdir, a.node.NodeId), values); err != nil { + return err + } + + return nil + }(); err != nil { + return err + } + + return nil +} + +// DeleteNode implements ProviderClient +func (a awsSpotClient) DeleteNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + values := parseValues(a, sshPath) + + /* + steps: + - check if state present in db + - if present load that to working dir + - else initialize new tf dir + - destroy node with terraform + - delete final state + */ + + if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws-spot"), a.awsS3Client, false); err != nil { + return err + } + + // destroy the tf file + if err := func() error { + if err := utils.DestroyNode(a.node.NodeId, values); err != nil { + return err + } + + return nil + }(); err != nil { + return err + } + + return nil +} + +func NewAwsSpotProviderClient(node aws.AWSNode, cpd common.CommonProviderData, apc aws.AwsProviderConfig) (common.ProviderClient, error) { + awsS3Client, err := awss3.NewAwsS3Client(apc.AccessKey, apc.AccessSecret, apc.AccountName) + if err != nil { + return nil, err + } + + return awsSpotClient{ + node: node, + awsS3Client: awsS3Client, + + accessKey: apc.AccessKey, + accessSecret: apc.AccessSecret, + accountName: apc.AccountName, + accountId: apc.AccountId, + + tfTemplates: cpd.TfTemplates, + labels: cpd.Labels, + taints: cpd.Taints, + }, nil +} diff --git a/apps/nodectrl/internal/domain/aws/main.go b/apps/nodectrl/internal/domain/aws/main.go index fbdc2e17c..2e6d53c52 100644 --- a/apps/nodectrl/internal/domain/aws/main.go +++ b/apps/nodectrl/internal/domain/aws/main.go @@ -20,6 +20,7 @@ type AwsProviderConfig struct { AccessKey string `yaml:"accessKey"` AccessSecret string `yaml:"accessSecret"` AccountName string `yaml:"accountName"` + AccountId string `yaml:"accountId"` } type AWSNode struct { @@ -31,21 +32,21 @@ type AWSNode struct { IsGpu bool `yaml:"isGpu"` } -type awsClient struct { +type AwsClient struct { node AWSNode awsS3Client awss3.AwsS3 accessKey string accessSecret string - accountId string + accountName string - SSHPath string + // SSHPath string tfTemplates string labels map[string]string taints []string } -type tokenAndKubeconfig struct { +type TokenAndKubeconfig struct { Token string `json:"token"` Kubeconfig string `json:"kubeconfig"` ServerIp string `json:"serverIp"` @@ -53,23 +54,23 @@ type tokenAndKubeconfig struct { } // AddMaster implements common.ProviderClient. -func (a awsClient) AddMaster(ctx context.Context) error { +func (a AwsClient) AddMaster(ctx context.Context) error { // fetch token - a.SSHPath = path.Join("/tmp/ssh", a.accountId) + sshPath := path.Join("/tmp/ssh", a.accountName) - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountId) + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { return err } - if _, err := os.Stat(a.SSHPath); err != nil { - if e := os.Mkdir(a.SSHPath, os.ModePerm); e != nil { + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { return e } } - tokenPath := path.Join(a.SSHPath, "config.yaml") + tokenPath := path.Join(sshPath, "config.yaml") if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { return err } @@ -79,7 +80,7 @@ func (a awsClient) AddMaster(ctx context.Context) error { return err } - kc := tokenAndKubeconfig{} + kc := TokenAndKubeconfig{} if err := yaml.Unmarshal(b, &kc); err != nil { return err @@ -107,7 +108,7 @@ func (a awsClient) AddMaster(ctx context.Context) error { for { if e := utils.ExecCmd( fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", a.SSHPath), + fmt.Sprintf("%v/access", sshPath), string(ip), ), "checking if node is ready"); e == nil { @@ -124,7 +125,7 @@ func (a awsClient) AddMaster(ctx context.Context) error { // attach to cluster as master cmd := fmt.Sprintf( "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", - a.SSHPath, + sshPath, string(ip), kc.ServerIp, strings.TrimSpace(string(kc.Token)), @@ -139,24 +140,24 @@ func (a awsClient) AddMaster(ctx context.Context) error { return nil } -func (a awsClient) AddWorker(ctx context.Context) error { +func (a AwsClient) AddWorker(ctx context.Context) error { // fetch token - a.SSHPath = path.Join("/tmp/ssh", a.accountId) + sshPath := path.Join("/tmp/ssh", a.accountName) - if _, err := os.Stat(a.SSHPath); err != nil { - if e := os.Mkdir(a.SSHPath, os.ModePerm); e != nil { + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { return e } } - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountId) + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { return err } - tokenPath := path.Join(a.SSHPath, "config.yaml") + tokenPath := path.Join(sshPath, "config.yaml") if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { return err } @@ -166,7 +167,7 @@ func (a awsClient) AddWorker(ctx context.Context) error { return err } - kc := tokenAndKubeconfig{} + kc := TokenAndKubeconfig{} if err := yaml.Unmarshal(b, &kc); err != nil { return err @@ -194,7 +195,7 @@ func (a awsClient) AddWorker(ctx context.Context) error { for { if e := utils.ExecCmd( fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%s/access", a.SSHPath), + fmt.Sprintf("%s/access", sshPath), string(ip), ), "checking if node ready"); e == nil { @@ -226,7 +227,7 @@ func (a awsClient) AddWorker(ctx context.Context) error { cmd := fmt.Sprintf( "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", - a.SSHPath, + sshPath, ip, kc.ServerIp, strings.TrimSpace(string(kc.Token)), @@ -250,15 +251,15 @@ func (a awsClient) AddWorker(ctx context.Context) error { return nil } -func (a awsClient) SetupSSH() error { +func (a AwsClient) SetupSSH() error { const sshDir = "/tmp/ssh" if _, err := os.Stat(sshDir); err != nil { return os.Mkdir(sshDir, os.ModePerm) } - destDir := path.Join(sshDir, a.accountId) - fileName := fmt.Sprintf("%s.zip", a.accountId) + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) if err := a.awsS3Client.IsFileExists(fileName); err != nil { @@ -304,7 +305,7 @@ func (a awsClient) SetupSSH() error { return nil } -func (a awsClient) saveForSure() error { +func (a AwsClient) saveForSure() error { count := 0 for { if err := a.saveSSH(); err == nil { @@ -319,10 +320,10 @@ func (a awsClient) saveForSure() error { } } -func (a awsClient) saveSSH() error { +func (a AwsClient) saveSSH() error { const sshDir = "/tmp/ssh" - destDir := path.Join(sshDir, a.accountId) - fileName := fmt.Sprintf("%s.zip", a.accountId) + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) if err := utils.ZipSource(destDir, path.Join(sshDir, fileName)); err != nil { return err @@ -336,7 +337,7 @@ func (a awsClient) saveSSH() error { } // CreateCluster implements common.ProviderClient -func (a awsClient) CreateCluster(ctx context.Context) error { +func (a AwsClient) CreateCluster(ctx context.Context) error { /* create node check for rediness @@ -349,7 +350,7 @@ func (a awsClient) CreateCluster(ctx context.Context) error { return err } defer a.saveForSure() - a.SSHPath = path.Join("/tmp/ssh", a.accountId) + sshPath := path.Join("/tmp/ssh", a.accountName) if err := a.NewNode(ctx); err != nil { return err @@ -365,7 +366,7 @@ func (a awsClient) CreateCluster(ctx context.Context) error { for { if e := utils.ExecCmd( fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", a.SSHPath), + fmt.Sprintf("%v/access", sshPath), string(ip), ), "checking is node is ready"); e == nil { @@ -384,7 +385,7 @@ func (a awsClient) CreateCluster(ctx context.Context) error { // install k3s cmd := fmt.Sprintf( "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --token=%s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s --cluster-init", - a.SSHPath, + sshPath, string(ip), masterToken.String(), string(ip), @@ -396,7 +397,7 @@ func (a awsClient) CreateCluster(ctx context.Context) error { } // needed to fetch kubeconfig - configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", a.SSHPath, string(ip)), "fetching kubeconfig from the cluster") + configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", sshPath, string(ip)), "fetching kubeconfig from the cluster") if err != nil { return err } @@ -415,12 +416,12 @@ func (a awsClient) CreateCluster(ctx context.Context) error { return err } - tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", a.SSHPath, string(ip)), "fetching node token from the cluster") + tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", sshPath, string(ip)), "fetching node token from the cluster") if err != nil { return err } - st := tokenAndKubeconfig{ + st := TokenAndKubeconfig{ Token: string(tokenOut), Kubeconfig: string(kc), ServerIp: string(ip), @@ -432,20 +433,20 @@ func (a awsClient) CreateCluster(ctx context.Context) error { return err } - tokenPath := path.Join(a.SSHPath, "config.yaml") + tokenPath := path.Join(sshPath, "config.yaml") if err := os.WriteFile(tokenPath, b, os.ModePerm); err != nil { return err } - if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountId)); err != nil { + if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountName)); err != nil { return err } return err } -func parseValues(a awsClient) map[string]string { +func parseValues(a AwsClient, sshPath string) map[string]string { values := map[string]string{} values["access_key"] = a.accessKey @@ -454,13 +455,13 @@ func parseValues(a awsClient) map[string]string { values["region"] = a.node.Region values["node_id"] = a.node.NodeId values["instance_type"] = a.node.InstanceType - values["keys-path"] = a.SSHPath + values["keys-path"] = sshPath values["ami"] = a.node.ImageId return values } -func (a awsClient) SaveToDbGuranteed(ctx context.Context) { +func (a AwsClient) SaveToDbGuranteed(ctx context.Context) { for { if err := utils.SaveToDb(a.node.NodeId, a.awsS3Client); err == nil { break @@ -472,8 +473,9 @@ func (a awsClient) SaveToDbGuranteed(ctx context.Context) { } // NewNode implements ProviderClient -func (a awsClient) NewNode(ctx context.Context) error { - values := parseValues(a) +func (a AwsClient) NewNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + values := parseValues(a, sshPath) if true { if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws"), a.awsS3Client, true); err != nil { @@ -504,8 +506,9 @@ func (a awsClient) NewNode(ctx context.Context) error { } // DeleteNode implements ProviderClient -func (a awsClient) DeleteNode(ctx context.Context) error { - values := parseValues(a) +func (a AwsClient) DeleteNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + values := parseValues(a, sshPath) /* steps: @@ -540,17 +543,16 @@ func NewAwsProviderClient(node AWSNode, cpd common.CommonProviderData, apc AwsPr return nil, err } - return awsClient{ + return AwsClient{ node: node, awsS3Client: awsS3Client, accessKey: apc.AccessKey, accessSecret: apc.AccessSecret, - accountId: apc.AccountName, + accountName: apc.AccountName, tfTemplates: cpd.TfTemplates, labels: cpd.Labels, taints: cpd.Taints, - SSHPath: cpd.SSHPath, }, nil } diff --git a/apps/nodectrl/internal/domain/provider-client-fx.go b/apps/nodectrl/internal/domain/provider-client-fx.go index f1f7368da..8df2c5085 100644 --- a/apps/nodectrl/internal/domain/provider-client-fx.go +++ b/apps/nodectrl/internal/domain/provider-client-fx.go @@ -6,6 +6,7 @@ import ( "go.uber.org/fx" "kloudlite.io/apps/nodectrl/internal/domain/aws" + awsspot "kloudlite.io/apps/nodectrl/internal/domain/aws-spot" "kloudlite.io/apps/nodectrl/internal/domain/common" "kloudlite.io/apps/nodectrl/internal/domain/do" "kloudlite.io/apps/nodectrl/internal/domain/utils" @@ -44,6 +45,22 @@ var ProviderClientFx = fx.Module("provider-client-fx", } return aws.NewAwsProviderClient(node, cpd, apc) + + case "aws-spot": + + node := awsspot.AwsASpotNode{} + + if err := utils.Base64YamlDecode(env.NodeConfig, &node); err != nil { + return nil, err + } + + apc := aws.AwsProviderConfig{} + + if err := utils.Base64YamlDecode(env.AWSProviderConfig, &apc); err != nil { + return nil, err + } + + return awsspot.NewAwsSpotProviderClient(node, cpd, apc) case "azure": panic("not implemented") case "do": diff --git a/apps/nodectrl/internal/env/env.go b/apps/nodectrl/internal/env/env.go index 4cb170063..7197ebe2b 100644 --- a/apps/nodectrl/internal/env/env.go +++ b/apps/nodectrl/internal/env/env.go @@ -9,9 +9,6 @@ type Env struct { NodeConfig string `env:"NODE_CONFIG" required:"true"` ProviderConfig string `env:"PROVIDER_CONFIG" required:"true"` - // DBUrl string `env:"DB_URL" required:"true"` - // DBName string `env:"DB_NAME" required:"true"` - AWSProviderConfig string `env:"AWS_PROVIDER_CONFIG"` GCPProviderConfig string `env:"GCP_PROVIDER_CONFIG"` AzureProviderConfig string `env:"AZURE_PROVIDER_CONFIG"` diff --git a/apps/nodectrl/terraform/aws-spot/init.sh b/apps/nodectrl/terraform/aws-spot/init.sh new file mode 100644 index 000000000..2c2ad0a7c --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir /k3s +cat >> /k3s/data.yaml << EOF +${nodeConfigYaml} +EOF + +cat >> /root/.ssh/authorized_keys << EOF +${pubkey} +EOF diff --git a/apps/nodectrl/terraform/aws-spot/resource.tf b/apps/nodectrl/terraform/aws-spot/resource.tf new file mode 100644 index 000000000..9bd67348a --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/resource.tf @@ -0,0 +1,179 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.3.0" + } + } + required_version = ">= 1.2.0" +} + +provider "aws" { + region = var.region + access_key = var.access_key + secret_key = var.secret_key +} + +output "node-name" { + value = var.node_id +} + + +resource "aws_security_group" "sg" { + + name = "sg-${var.node_id}" + + ingress { + from_port = 22 + protocol = "tcp" + to_port = 22 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 2379 + protocol = "tcp" + to_port = 2379 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 2380 + protocol = "tcp" + to_port = 2380 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 6443 + protocol = "tcp" + to_port = 6443 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 8472 + protocol = "udp" + to_port = 8472 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 9100 + protocol = "tcp" + to_port = 9100 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 51820 + protocol = "udp" + to_port = 51820 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 51821 + protocol = "udp" + to_port = 51821 + cidr_blocks = ["0.0.0.0/0"] + } + + + ingress { + from_port = 10250 + protocol = "tcp" + to_port = 10250 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 80 + protocol = "tcp" + to_port = 80 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 443 + protocol = "tcp" + to_port = 443 + cidr_blocks = ["0.0.0.0/0"] + } + + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + +} + + +resource "aws_launch_template" "spot-template" { + name = var.node_id + image_id = "ami-0e63f370aa626048d" + + + user_data = base64encode(templatefile("./init.sh", { + pubkey = file("${var.keys-path}/access.pub") + nodeConfigYaml = file("${var.keys-path}/data.yaml") + # hostname = var.node_id + })) + + + + block_device_mappings { + device_name = "/dev/sda1" + ebs { + volume_size = 40 + } + } + + network_interfaces { + associate_public_ip_address = true + security_groups = [aws_security_group.sg.id] + } + + tag_specifications { + resource_type = "instance" + tags = { + Name = var.node_id + } + } +} + + + +resource "aws_spot_fleet_request" "byoc-spot-node" { + iam_fleet_role = "arn:aws:iam::${var.account_id}:role/aws-ec2-spot-fleet-tagging-role" + + target_capacity = 1 + + terminate_instances_on_delete = true + on_demand_target_capacity = 0 + allocation_strategy = "priceCapacityOptimized" + on_demand_allocation_strategy = "lowestPrice" + + + launch_template_config { + launch_template_specification { + id = aws_launch_template.spot-template.id + version = "1" + } + overrides { + instance_requirements { + vcpu_count { + min = 4 + max = 4 + } + memory_mib { + min = 8192 + max = 8192 + } + } + } + } +} diff --git a/apps/nodectrl/terraform/aws-spot/variables.tf b/apps/nodectrl/terraform/aws-spot/variables.tf new file mode 100644 index 000000000..33fbfe783 --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/variables.tf @@ -0,0 +1,23 @@ +variable "access_key" { + default = "" +} + +variable "secret_key" { + default = "" +} + +variable "region" { + default = "" +} + +variable "node_id" { + default = "" +} + +variable "keys-path" { + default = "" +} + +variable "account_id" { + default = "" +} diff --git a/apps/nodectrl/terraform/aws/resource.tf b/apps/nodectrl/terraform/aws/resource.tf index 0ad99ab34..736c263a9 100644 --- a/apps/nodectrl/terraform/aws/resource.tf +++ b/apps/nodectrl/terraform/aws/resource.tf @@ -165,7 +165,6 @@ resource "aws_instance" "byoc-node" { } - output "node-ip" { value = aws_instance.byoc-node.public_ip } From df4c730ba1dc3e2ddde191846cc003c81f3f9b4a Mon Sep 17 00:00:00 2001 From: Abdhesh Nayak Date: Mon, 19 Jun 2023 12:48:50 +0530 Subject: [PATCH 2/4] wip --- apps/nodectrl/internal/domain/aws-spot/main.go | 3 --- apps/nodectrl/internal/domain/aws/main.go | 1 - apps/nodectrl/terraform/aws-spot/resource.tf | 4 +++- apps/nodectrl/terraform/aws-spot/variables.tf | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/apps/nodectrl/internal/domain/aws-spot/main.go b/apps/nodectrl/internal/domain/aws-spot/main.go index 0d5c916d3..c3368380c 100644 --- a/apps/nodectrl/internal/domain/aws-spot/main.go +++ b/apps/nodectrl/internal/domain/aws-spot/main.go @@ -31,7 +31,6 @@ type awsSpotClient struct { accessKey string accessSecret string accountName string - accountId string tfTemplates string labels map[string]string @@ -346,7 +345,6 @@ func parseValues(a awsSpotClient, sshPath string) map[string]string { values["region"] = a.node.Region values["node_id"] = a.node.NodeId values["keys-path"] = sshPath - values["account_id"] = a.accountId // TODO: ami according to region // ami is fixed for now @@ -464,7 +462,6 @@ func NewAwsSpotProviderClient(node aws.AWSNode, cpd common.CommonProviderData, a accessKey: apc.AccessKey, accessSecret: apc.AccessSecret, accountName: apc.AccountName, - accountId: apc.AccountId, tfTemplates: cpd.TfTemplates, labels: cpd.Labels, diff --git a/apps/nodectrl/internal/domain/aws/main.go b/apps/nodectrl/internal/domain/aws/main.go index 2e6d53c52..fad834058 100644 --- a/apps/nodectrl/internal/domain/aws/main.go +++ b/apps/nodectrl/internal/domain/aws/main.go @@ -20,7 +20,6 @@ type AwsProviderConfig struct { AccessKey string `yaml:"accessKey"` AccessSecret string `yaml:"accessSecret"` AccountName string `yaml:"accountName"` - AccountId string `yaml:"accountId"` } type AWSNode struct { diff --git a/apps/nodectrl/terraform/aws-spot/resource.tf b/apps/nodectrl/terraform/aws-spot/resource.tf index 9bd67348a..6a104d34f 100644 --- a/apps/nodectrl/terraform/aws-spot/resource.tf +++ b/apps/nodectrl/terraform/aws-spot/resource.tf @@ -19,6 +19,8 @@ output "node-name" { } +data "aws_caller_identity" "current" {} + resource "aws_security_group" "sg" { name = "sg-${var.node_id}" @@ -148,7 +150,7 @@ resource "aws_launch_template" "spot-template" { resource "aws_spot_fleet_request" "byoc-spot-node" { - iam_fleet_role = "arn:aws:iam::${var.account_id}:role/aws-ec2-spot-fleet-tagging-role" + iam_fleet_role = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-ec2-spot-fleet-tagging-role" target_capacity = 1 diff --git a/apps/nodectrl/terraform/aws-spot/variables.tf b/apps/nodectrl/terraform/aws-spot/variables.tf index 33fbfe783..50e52813a 100644 --- a/apps/nodectrl/terraform/aws-spot/variables.tf +++ b/apps/nodectrl/terraform/aws-spot/variables.tf @@ -17,7 +17,3 @@ variable "node_id" { variable "keys-path" { default = "" } - -variable "account_id" { - default = "" -} From 2776552ce250df503edeb6a5d3def68381a2ef6c Mon Sep 17 00:00:00 2001 From: Abdhesh Nayak Date: Mon, 19 Jun 2023 14:40:59 +0530 Subject: [PATCH 3/4] wip --- .../nodectrl/internal/domain/aws-spot/main.go | 470 ------------------ .../internal/domain/aws/add-master.go | 101 ++++ .../internal/domain/aws/add-worker.go | 125 +++++ .../internal/domain/aws/create-cluster.go | 150 ++++++ apps/nodectrl/internal/domain/aws/main.go | 328 +----------- .../internal/domain/provider-client-fx.go | 15 - 6 files changed, 377 insertions(+), 812 deletions(-) delete mode 100644 apps/nodectrl/internal/domain/aws-spot/main.go create mode 100644 apps/nodectrl/internal/domain/aws/add-master.go create mode 100644 apps/nodectrl/internal/domain/aws/add-worker.go create mode 100644 apps/nodectrl/internal/domain/aws/create-cluster.go diff --git a/apps/nodectrl/internal/domain/aws-spot/main.go b/apps/nodectrl/internal/domain/aws-spot/main.go deleted file mode 100644 index c3368380c..000000000 --- a/apps/nodectrl/internal/domain/aws-spot/main.go +++ /dev/null @@ -1,470 +0,0 @@ -package awsspot - -import ( - "context" - "fmt" - "os" - "path" - "strings" - "time" - - "gopkg.in/yaml.v2" - - "kloudlite.io/apps/nodectrl/internal/domain/aws" - "kloudlite.io/apps/nodectrl/internal/domain/common" - "kloudlite.io/apps/nodectrl/internal/domain/utils" - awss3 "kloudlite.io/pkg/aws-s3" -) - -type spotNodeConfig struct { - ServerIP string `yaml:"serverIp"` - Token string `yaml:"token"` - NodeName string `yaml:"nodeName"` - Taints []string `yaml:"taints"` - Labels map[string]string `yaml:"labels"` -} - -type awsSpotClient struct { - node aws.AWSNode - awsS3Client awss3.AwsS3 - - accessKey string - accessSecret string - accountName string - - tfTemplates string - labels map[string]string - taints []string -} - -// AddMaster implements common.ProviderClient. -func (a awsSpotClient) AddMaster(ctx context.Context) error { - // fetch token - sshPath := path.Join("/tmp/ssh", a.accountName) - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { - return err - } - - if _, err := os.Stat(sshPath); err != nil { - if e := os.Mkdir(sshPath, os.ModePerm); e != nil { - return e - } - } - - tokenPath := path.Join(sshPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := aws.TokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - if err := a.writeSpotNode(kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", sshPath), - string(ip), - ), - "checking if node is ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - // attach to cluster as master - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", - sshPath, - string(ip), - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - string(ip), - a.node.NodeId, - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { - return err - } - - return nil -} - -func (a awsSpotClient) AddWorker(ctx context.Context) error { - // fetch token - - sshPath := path.Join("/tmp/ssh", a.accountName) - - if _, err := os.Stat(sshPath); err != nil { - if e := os.Mkdir(sshPath, os.ModePerm); e != nil { - return e - } - } - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { - return err - } - - tokenPath := path.Join(sshPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := aws.TokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - if err := a.writeSpotNode(kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%s/access", sshPath), - string(ip), - ), - "checking if node ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - labels := func() []string { - l := []string{} - for k, v := range map[string]string{ - "kloudlite.io/public-ip": string(ip), - } { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - - for k, v := range a.labels { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - return l - }() - - // attach to cluster as workernode - - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", - sshPath, - ip, - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - ip, - a.node.NodeId, - strings.Join(labels, " "), - func() string { - if a.node.IsGpu { - // return "--docker" - // return "--docker" - return "" - } - return "" - }(), - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { - return err - } - - return nil -} - -func (a awsSpotClient) SetupSSH() error { - const sshDir = "/tmp/ssh" - - if _, err := os.Stat(sshDir); err != nil { - err := os.Mkdir(sshDir, os.ModePerm) - if err != nil { - return err - } - } - - destDir := path.Join(sshDir, a.accountName) - fileName := fmt.Sprintf("%s.zip", a.accountName) - - if err := a.awsS3Client.IsFileExists(fileName); err != nil { - - if _, err := os.Stat(destDir); err == nil { - if err := os.RemoveAll(destDir); err != nil { - return err - } - } - - if e := os.Mkdir(destDir, os.ModePerm); e != nil { - return e - } - - privateKeyBytes, publicKeyBytes, err := utils.GenerateKeys() - if err != nil { - return err - } - - if err := os.WriteFile(fmt.Sprintf("%s/access.pub", destDir), publicKeyBytes, os.ModePerm); err != nil { - return err - } - - if err := os.WriteFile(fmt.Sprintf("%s/access", destDir), privateKeyBytes, 0400); err != nil { - return err - } - return nil - } - - if err := os.RemoveAll(destDir); err != nil { - return err - } - - err := a.awsS3Client.DownloadFile(path.Join(sshDir, fileName), fileName) - if err != nil { - return err - } - - _, err = utils.Unzip(path.Join(sshDir, fileName), sshDir) - if err != nil { - return err - } - - return nil -} - -func (a awsSpotClient) saveForSure() error { - count := 0 - for { - if err := a.saveSSH(); err == nil { - return nil - } - if count >= 10 { - return fmt.Errorf("coudn't save the state") - } - - time.Sleep(time.Second * 20) - count++ - } -} - -func (a awsSpotClient) saveSSH() error { - const sshDir = "/tmp/ssh" - destDir := path.Join(sshDir, a.accountName) - fileName := fmt.Sprintf("%s.zip", a.accountName) - - if err := utils.ZipSource(destDir, path.Join(sshDir, fileName)); err != nil { - return err - } - - if err := a.awsS3Client.UploadFile(path.Join(sshDir, fileName), fileName); err != nil { - return err - } - - return nil -} - -// CreateCluster implements common.ProviderClient -func (a awsSpotClient) CreateCluster(ctx context.Context) error { - return fmt.Errorf("you can't create cluster using aws spot for now") -} - -func parseValues(a awsSpotClient, sshPath string) map[string]string { - values := map[string]string{} - - values["access_key"] = a.accessKey - values["secret_key"] = a.accessSecret - - values["region"] = a.node.Region - values["node_id"] = a.node.NodeId - values["keys-path"] = sshPath - - // TODO: ami according to region - // ami is fixed for now - // values["ami"] = a.node.ImageId - - return values -} - -func (a awsSpotClient) SaveToDbGuranteed(ctx context.Context) { - for { - if err := utils.SaveToDb(a.node.NodeId, a.awsS3Client); err == nil { - break - } else { - fmt.Println(err) - } - time.Sleep(time.Second * 20) - } -} - -func (a awsSpotClient) writeSpotNode(kc aws.TokenAndKubeconfig) error { - const sshDir = "/tmp/ssh" - sshPath := path.Join(sshDir, a.accountName) - dataPath := path.Join(sshPath, "data.yaml") - - nConfig := spotNodeConfig{ - ServerIP: kc.ServerIp, - Token: kc.Token, - NodeName: a.node.NodeId, - Taints: []string{}, - Labels: map[string]string{}, - } - - out, err := yaml.Marshal(nConfig) - if err != nil { - return err - } - - return os.WriteFile(dataPath, out, os.ModePerm) -} - -// NewNode implements ProviderClient -func (a awsSpotClient) NewNode(ctx context.Context) error { - sshPath := path.Join("/tmp/ssh", a.accountName) - - values := parseValues(a, sshPath) - - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws-spot"), a.awsS3Client, true); err != nil { - return err - } - - defer a.SaveToDbGuranteed(ctx) - - // upload the final state to the db, upsert if db is already present - - // apply the tf file - if err := func() error { - if err := utils.InitTFdir(path.Join(utils.Workdir, a.node.NodeId)); err != nil { - return err - } - - if err := utils.ApplyTF(path.Join(utils.Workdir, a.node.NodeId), values); err != nil { - return err - } - - return nil - }(); err != nil { - return err - } - - return nil -} - -// DeleteNode implements ProviderClient -func (a awsSpotClient) DeleteNode(ctx context.Context) error { - sshPath := path.Join("/tmp/ssh", a.accountName) - values := parseValues(a, sshPath) - - /* - steps: - - check if state present in db - - if present load that to working dir - - else initialize new tf dir - - destroy node with terraform - - delete final state - */ - - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws-spot"), a.awsS3Client, false); err != nil { - return err - } - - // destroy the tf file - if err := func() error { - if err := utils.DestroyNode(a.node.NodeId, values); err != nil { - return err - } - - return nil - }(); err != nil { - return err - } - - return nil -} - -func NewAwsSpotProviderClient(node aws.AWSNode, cpd common.CommonProviderData, apc aws.AwsProviderConfig) (common.ProviderClient, error) { - awsS3Client, err := awss3.NewAwsS3Client(apc.AccessKey, apc.AccessSecret, apc.AccountName) - if err != nil { - return nil, err - } - - return awsSpotClient{ - node: node, - awsS3Client: awsS3Client, - - accessKey: apc.AccessKey, - accessSecret: apc.AccessSecret, - accountName: apc.AccountName, - - tfTemplates: cpd.TfTemplates, - labels: cpd.Labels, - taints: cpd.Taints, - }, nil -} diff --git a/apps/nodectrl/internal/domain/aws/add-master.go b/apps/nodectrl/internal/domain/aws/add-master.go new file mode 100644 index 000000000..9ec4cca8c --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/add-master.go @@ -0,0 +1,101 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +// AddMaster implements common.ProviderClient. +func (a AwsClient) AddMaster(ctx context.Context) error { + // fetch token + sshPath := path.Join("/tmp/ssh", a.accountName) + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { + return e + } + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + // create node and wait for ready + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%v/access", sshPath), + string(ip), + ), + "checking if node is ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + // attach to cluster as master + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", + sshPath, + string(ip), + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + string(ip), + a.node.NodeId, + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { + return err + } + + return nil +} diff --git a/apps/nodectrl/internal/domain/aws/add-worker.go b/apps/nodectrl/internal/domain/aws/add-worker.go new file mode 100644 index 000000000..07048e82f --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/add-worker.go @@ -0,0 +1,125 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +func (a AwsClient) AddWorker(ctx context.Context) error { + // fetch token + + sshPath := path.Join("/tmp/ssh", a.accountName) + + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { + return e + } + } + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + // create node and wait for ready + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%s/access", sshPath), + string(ip), + ), + "checking if node ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + labels := func() []string { + l := []string{} + for k, v := range map[string]string{ + "kloudlite.io/public-ip": string(ip), + } { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + + for k, v := range a.labels { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + return l + }() + + // attach to cluster as workernode + + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", + sshPath, + ip, + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + ip, + a.node.NodeId, + strings.Join(labels, " "), + func() string { + if a.node.IsGpu { + // return "--docker" + // return "--docker" + return "" + } + return "" + }(), + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { + return err + } + + return nil +} diff --git a/apps/nodectrl/internal/domain/aws/create-cluster.go b/apps/nodectrl/internal/domain/aws/create-cluster.go new file mode 100644 index 000000000..20535e983 --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/create-cluster.go @@ -0,0 +1,150 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/common" + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +// CreateCluster implements common.ProviderClient +func (a AwsClient) CreateCluster(ctx context.Context) error { + /* + create node + check for rediness + install k3s + check for rediness + install maaster + */ + + if err := func() error { + switch a.node.NodeType { + case "ec2": + return nil + default: + return fmt.Errorf("this type of node is not supported for now (%q)", a.node.NodeType) + } + }(); err != nil { + return err + } + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + sshPath := path.Join("/tmp/ssh", a.accountName) + + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%v/access", sshPath), + string(ip), + ), + "checking is node is ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + masterToken := guuid.New() + + // install k3s + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --token=%s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s --cluster-init", + sshPath, + string(ip), + masterToken.String(), + string(ip), + a.node.NodeId, + ) + + if err := utils.ExecCmd(cmd, "installing k3s"); err != nil { + return err + } + // needed to fetch kubeconfig + + configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", sshPath, string(ip)), "fetching kubeconfig from the cluster") + if err != nil { + return err + } + + var kubeconfig common.KubeConfigType + if err := yaml.Unmarshal(configOut, &kubeconfig); err != nil { + return err + } + + for i := range kubeconfig.Clusters { + kubeconfig.Clusters[i].Cluster.Server = fmt.Sprintf("https://%s:6443", string(ip)) + } + + kc, err := yaml.Marshal(kubeconfig) + if err != nil { + return err + } + + tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", sshPath, string(ip)), "fetching node token from the cluster") + if err != nil { + return err + } + + st := TokenAndKubeconfig{ + Token: string(tokenOut), + Kubeconfig: string(kc), + ServerIp: string(ip), + MasterToken: masterToken.String(), + } + + b, err := yaml.Marshal(st) + if err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + + if err := os.WriteFile(tokenPath, b, os.ModePerm); err != nil { + return err + } + + if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountName)); err != nil { + return err + } + + return err +} + +func parseValues(a AwsClient, sshPath string) map[string]string { + values := map[string]string{} + + values["access_key"] = a.accessKey + values["secret_key"] = a.accessSecret + + values["region"] = a.node.Region + values["node_id"] = a.node.NodeId + values["instance_type"] = a.node.InstanceType + values["keys-path"] = sshPath + values["ami"] = a.node.ImageId + + return values +} diff --git a/apps/nodectrl/internal/domain/aws/main.go b/apps/nodectrl/internal/domain/aws/main.go index fad834058..54545a585 100644 --- a/apps/nodectrl/internal/domain/aws/main.go +++ b/apps/nodectrl/internal/domain/aws/main.go @@ -5,12 +5,8 @@ import ( "fmt" "os" "path" - "strings" "time" - guuid "github.com/google/uuid" - "gopkg.in/yaml.v2" - "kloudlite.io/apps/nodectrl/internal/domain/common" "kloudlite.io/apps/nodectrl/internal/domain/utils" awss3 "kloudlite.io/pkg/aws-s3" @@ -29,6 +25,7 @@ type AWSNode struct { VPC string `yaml:"vpc"` ImageId string `yaml:"imageId"` IsGpu bool `yaml:"isGpu"` + NodeType string `yaml:"nodeType" json:"nodeType"` } type AwsClient struct { @@ -52,204 +49,6 @@ type TokenAndKubeconfig struct { MasterToken string `json:"masterToken"` } -// AddMaster implements common.ProviderClient. -func (a AwsClient) AddMaster(ctx context.Context) error { - // fetch token - sshPath := path.Join("/tmp/ssh", a.accountName) - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { - return err - } - - if _, err := os.Stat(sshPath); err != nil { - if e := os.Mkdir(sshPath, os.ModePerm); e != nil { - return e - } - } - - tokenPath := path.Join(sshPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := TokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", sshPath), - string(ip), - ), - "checking if node is ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - // attach to cluster as master - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", - sshPath, - string(ip), - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - string(ip), - a.node.NodeId, - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { - return err - } - - return nil -} - -func (a AwsClient) AddWorker(ctx context.Context) error { - // fetch token - - sshPath := path.Join("/tmp/ssh", a.accountName) - - if _, err := os.Stat(sshPath); err != nil { - if e := os.Mkdir(sshPath, os.ModePerm); e != nil { - return e - } - } - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { - return err - } - - tokenPath := path.Join(sshPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := TokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%s/access", sshPath), - string(ip), - ), - "checking if node ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - labels := func() []string { - l := []string{} - for k, v := range map[string]string{ - "kloudlite.io/public-ip": string(ip), - } { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - - for k, v := range a.labels { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - return l - }() - - // attach to cluster as workernode - - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", - sshPath, - ip, - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - ip, - a.node.NodeId, - strings.Join(labels, " "), - func() string { - if a.node.IsGpu { - // return "--docker" - // return "--docker" - return "" - } - return "" - }(), - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { - return err - } - - return nil -} - func (a AwsClient) SetupSSH() error { const sshDir = "/tmp/ssh" @@ -335,131 +134,6 @@ func (a AwsClient) saveSSH() error { return nil } -// CreateCluster implements common.ProviderClient -func (a AwsClient) CreateCluster(ctx context.Context) error { - /* - create node - check for rediness - install k3s - check for rediness - install maaster - */ - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - sshPath := path.Join("/tmp/ssh", a.accountName) - - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", sshPath), - string(ip), - ), - "checking is node is ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - masterToken := guuid.New() - - // install k3s - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --token=%s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s --cluster-init", - sshPath, - string(ip), - masterToken.String(), - string(ip), - a.node.NodeId, - ) - - if err := utils.ExecCmd(cmd, "installing k3s"); err != nil { - return err - } - // needed to fetch kubeconfig - - configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", sshPath, string(ip)), "fetching kubeconfig from the cluster") - if err != nil { - return err - } - - var kubeconfig common.KubeConfigType - if err := yaml.Unmarshal(configOut, &kubeconfig); err != nil { - return err - } - - for i := range kubeconfig.Clusters { - kubeconfig.Clusters[i].Cluster.Server = fmt.Sprintf("https://%s:6443", string(ip)) - } - - kc, err := yaml.Marshal(kubeconfig) - if err != nil { - return err - } - - tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", sshPath, string(ip)), "fetching node token from the cluster") - if err != nil { - return err - } - - st := TokenAndKubeconfig{ - Token: string(tokenOut), - Kubeconfig: string(kc), - ServerIp: string(ip), - MasterToken: masterToken.String(), - } - - b, err := yaml.Marshal(st) - if err != nil { - return err - } - - tokenPath := path.Join(sshPath, "config.yaml") - - if err := os.WriteFile(tokenPath, b, os.ModePerm); err != nil { - return err - } - - if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountName)); err != nil { - return err - } - - return err -} - -func parseValues(a AwsClient, sshPath string) map[string]string { - values := map[string]string{} - - values["access_key"] = a.accessKey - values["secret_key"] = a.accessSecret - - values["region"] = a.node.Region - values["node_id"] = a.node.NodeId - values["instance_type"] = a.node.InstanceType - values["keys-path"] = sshPath - values["ami"] = a.node.ImageId - - return values -} - func (a AwsClient) SaveToDbGuranteed(ctx context.Context) { for { if err := utils.SaveToDb(a.node.NodeId, a.awsS3Client); err == nil { diff --git a/apps/nodectrl/internal/domain/provider-client-fx.go b/apps/nodectrl/internal/domain/provider-client-fx.go index 8df2c5085..54879ee20 100644 --- a/apps/nodectrl/internal/domain/provider-client-fx.go +++ b/apps/nodectrl/internal/domain/provider-client-fx.go @@ -46,21 +46,6 @@ var ProviderClientFx = fx.Module("provider-client-fx", return aws.NewAwsProviderClient(node, cpd, apc) - case "aws-spot": - - node := awsspot.AwsASpotNode{} - - if err := utils.Base64YamlDecode(env.NodeConfig, &node); err != nil { - return nil, err - } - - apc := aws.AwsProviderConfig{} - - if err := utils.Base64YamlDecode(env.AWSProviderConfig, &apc); err != nil { - return nil, err - } - - return awsspot.NewAwsSpotProviderClient(node, cpd, apc) case "azure": panic("not implemented") case "do": From c1f9c078bbe7d2f5a90f293c4d1158abb94c4b6c Mon Sep 17 00:00:00 2001 From: Abdhesh Nayak Date: Mon, 19 Jun 2023 16:11:40 +0530 Subject: [PATCH 4/4] wip --- .../internal/domain/aws/add-master.go | 28 ++++--- .../internal/domain/aws/add-worker.go | 10 +++ .../internal/domain/aws/create-cluster.go | 10 ++- apps/nodectrl/internal/domain/aws/main.go | 82 ++++++++++++++++--- 4 files changed, 106 insertions(+), 24 deletions(-) diff --git a/apps/nodectrl/internal/domain/aws/add-master.go b/apps/nodectrl/internal/domain/aws/add-master.go index 9ec4cca8c..893b63cb3 100644 --- a/apps/nodectrl/internal/domain/aws/add-master.go +++ b/apps/nodectrl/internal/domain/aws/add-master.go @@ -13,8 +13,25 @@ import ( "kloudlite.io/apps/nodectrl/internal/domain/utils" ) +func (a AwsClient) ensureForMasters() error { + switch a.node.NodeType { + case "spot": + return fmt.Errorf("spot is not supported as a master") + default: + return nil + } +} + // AddMaster implements common.ProviderClient. func (a AwsClient) AddMaster(ctx context.Context) error { + if err := a.ensureForMasters(); err != nil { + return err + } + + if err := a.ensurePaths(); err != nil { + return err + } + // fetch token sshPath := path.Join("/tmp/ssh", a.accountName) @@ -24,12 +41,6 @@ func (a AwsClient) AddMaster(ctx context.Context) error { return err } - if _, err := os.Stat(sshPath); err != nil { - if e := os.Mkdir(sshPath, os.ModePerm); e != nil { - return e - } - } - tokenPath := path.Join(sshPath, "config.yaml") if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { return err @@ -53,11 +64,6 @@ func (a AwsClient) AddMaster(ctx context.Context) error { } defer a.saveForSure() - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") if err != nil { return err diff --git a/apps/nodectrl/internal/domain/aws/add-worker.go b/apps/nodectrl/internal/domain/aws/add-worker.go index 07048e82f..7548f661f 100644 --- a/apps/nodectrl/internal/domain/aws/add-worker.go +++ b/apps/nodectrl/internal/domain/aws/add-worker.go @@ -46,6 +46,12 @@ func (a AwsClient) AddWorker(ctx context.Context) error { return err } + if a.node.NodeType == "spot" { + if err := a.writeNodeConfig(kc); err != nil { + return err + } + } + // setup ssh if err := a.SetupSSH(); err != nil { @@ -58,6 +64,10 @@ func (a AwsClient) AddWorker(ctx context.Context) error { return err } + if a.node.NodeType == "spot" { + return nil + } + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") if err != nil { return err diff --git a/apps/nodectrl/internal/domain/aws/create-cluster.go b/apps/nodectrl/internal/domain/aws/create-cluster.go index 20535e983..2cd8648a9 100644 --- a/apps/nodectrl/internal/domain/aws/create-cluster.go +++ b/apps/nodectrl/internal/domain/aws/create-cluster.go @@ -7,6 +7,7 @@ import ( "path" "time" + guuid "github.com/google/uuid" "gopkg.in/yaml.v2" "kloudlite.io/apps/nodectrl/internal/domain/common" @@ -22,13 +23,16 @@ func (a AwsClient) CreateCluster(ctx context.Context) error { check for rediness install maaster */ + if err := a.ensureForMasters(); err != nil { + return err + } if err := func() error { switch a.node.NodeType { - case "ec2": - return nil + case "spot": + return fmt.Errorf("spot is not supported as a master") default: - return fmt.Errorf("this type of node is not supported for now (%q)", a.node.NodeType) + return nil } }(); err != nil { return err diff --git a/apps/nodectrl/internal/domain/aws/main.go b/apps/nodectrl/internal/domain/aws/main.go index 54545a585..1732ff935 100644 --- a/apps/nodectrl/internal/domain/aws/main.go +++ b/apps/nodectrl/internal/domain/aws/main.go @@ -7,6 +7,8 @@ import ( "path" "time" + "gopkg.in/yaml.v2" + "kloudlite.io/apps/nodectrl/internal/domain/common" "kloudlite.io/apps/nodectrl/internal/domain/utils" awss3 "kloudlite.io/pkg/aws-s3" @@ -49,11 +51,61 @@ type TokenAndKubeconfig struct { MasterToken string `json:"masterToken"` } -func (a AwsClient) SetupSSH() error { - const sshDir = "/tmp/ssh" +type NodeConfig struct { + ServerIP string `yaml:"serverIp"` + Token string `yaml:"token"` + NodeName string `yaml:"nodeName"` + Taints []string `yaml:"taints"` + Labels map[string]string `yaml:"labels"` +} +func (a AwsClient) ensurePaths() error { + const sshDir = "/tmp/ssh" + sshPath := path.Join(sshDir, a.accountName) if _, err := os.Stat(sshDir); err != nil { - return os.Mkdir(sshDir, os.ModePerm) + if err := os.Mkdir(sshDir, os.ModePerm); err != nil { + return err + } + } + + if _, err := os.Stat(sshPath); err != nil { + if err := os.Mkdir(sshPath, os.ModePerm); err != nil { + return err + } + } + + return nil +} + +func (a AwsClient) writeNodeConfig(kc TokenAndKubeconfig) error { + if err := a.ensurePaths(); err != nil { + return err + } + + const sshDir = "/tmp/ssh" + sshPath := path.Join(sshDir, a.accountName) + dataPath := path.Join(sshPath, "data.yaml") + + nc := NodeConfig{ + ServerIP: kc.ServerIp, + Token: kc.Token, + NodeName: a.node.NodeId, + Taints: []string{}, + Labels: map[string]string{}, + } + + out, err := yaml.Marshal(nc) + if err != nil { + return err + } + + return os.WriteFile(dataPath, out, os.ModePerm) +} + +func (a AwsClient) SetupSSH() error { + const sshDir = "/tmp/ssh" + if err := a.ensurePaths(); err != nil { + return err } destDir := path.Join(sshDir, a.accountName) @@ -145,19 +197,29 @@ func (a AwsClient) SaveToDbGuranteed(ctx context.Context) { } } +func (a AwsClient) getAwsTemplatePath() string { + return path.Join(a.tfTemplates, func() string { + switch a.node.NodeType { + case "spot": + return "aws-spot" + default: + return "aws" + } + }(), + ) +} + // NewNode implements ProviderClient func (a AwsClient) NewNode(ctx context.Context) error { sshPath := path.Join("/tmp/ssh", a.accountName) values := parseValues(a, sshPath) - if true { - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws"), a.awsS3Client, true); err != nil { - return err - } - - defer a.SaveToDbGuranteed(ctx) + if err := utils.MakeTfWorkFileReady(a.node.NodeId, a.getAwsTemplatePath(), a.awsS3Client, true); err != nil { + return err } + defer a.SaveToDbGuranteed(ctx) + // upload the final state to the db, upsert if db is already present // apply the tf file @@ -192,7 +254,7 @@ func (a AwsClient) DeleteNode(ctx context.Context) error { - delete final state */ - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws"), a.awsS3Client, false); err != nil { + if err := utils.MakeTfWorkFileReady(a.node.NodeId, a.getAwsTemplatePath(), a.awsS3Client, false); err != nil { return err }