diff --git a/apps/nodectrl/internal/domain/aws/add-master.go b/apps/nodectrl/internal/domain/aws/add-master.go new file mode 100644 index 000000000..893b63cb3 --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/add-master.go @@ -0,0 +1,107 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +func (a AwsClient) ensureForMasters() error { + switch a.node.NodeType { + case "spot": + return fmt.Errorf("spot is not supported as a master") + default: + return nil + } +} + +// AddMaster implements common.ProviderClient. +func (a AwsClient) AddMaster(ctx context.Context) error { + if err := a.ensureForMasters(); err != nil { + return err + } + + if err := a.ensurePaths(); err != nil { + return err + } + + // fetch token + sshPath := path.Join("/tmp/ssh", a.accountName) + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%v/access", sshPath), + string(ip), + ), + "checking if node is ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + // attach to cluster as master + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", + sshPath, + string(ip), + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + string(ip), + a.node.NodeId, + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { + return err + } + + return nil +} diff --git a/apps/nodectrl/internal/domain/aws/add-worker.go b/apps/nodectrl/internal/domain/aws/add-worker.go new file mode 100644 index 000000000..7548f661f --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/add-worker.go @@ -0,0 +1,135 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "time" + + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +func (a AwsClient) AddWorker(ctx context.Context) error { + // fetch token + + sshPath := path.Join("/tmp/ssh", a.accountName) + + if _, err := os.Stat(sshPath); err != nil { + if e := os.Mkdir(sshPath, os.ModePerm); e != nil { + return e + } + } + + tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountName) + + if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { + return err + } + + b, err := os.ReadFile(tokenPath) + if err != nil { + return err + } + + kc := TokenAndKubeconfig{} + + if err := yaml.Unmarshal(b, &kc); err != nil { + return err + } + + if a.node.NodeType == "spot" { + if err := a.writeNodeConfig(kc); err != nil { + return err + } + } + + // setup ssh + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + + // create node and wait for ready + if err := a.NewNode(ctx); err != nil { + return err + } + + if a.node.NodeType == "spot" { + return nil + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%s/access", sshPath), + string(ip), + ), + "checking if node ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + labels := func() []string { + l := []string{} + for k, v := range map[string]string{ + "kloudlite.io/public-ip": string(ip), + } { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + + for k, v := range a.labels { + l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) + } + return l + }() + + // attach to cluster as workernode + + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", + sshPath, + ip, + kc.ServerIp, + strings.TrimSpace(string(kc.Token)), + ip, + a.node.NodeId, + strings.Join(labels, " "), + func() string { + if a.node.IsGpu { + // return "--docker" + // return "--docker" + return "" + } + return "" + }(), + ) + + if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { + return err + } + + return nil +} diff --git a/apps/nodectrl/internal/domain/aws/create-cluster.go b/apps/nodectrl/internal/domain/aws/create-cluster.go new file mode 100644 index 000000000..2cd8648a9 --- /dev/null +++ b/apps/nodectrl/internal/domain/aws/create-cluster.go @@ -0,0 +1,154 @@ +package aws + +import ( + "context" + "fmt" + "os" + "path" + "time" + + guuid "github.com/google/uuid" + "gopkg.in/yaml.v2" + + "kloudlite.io/apps/nodectrl/internal/domain/common" + "kloudlite.io/apps/nodectrl/internal/domain/utils" +) + +// CreateCluster implements common.ProviderClient +func (a AwsClient) CreateCluster(ctx context.Context) error { + /* + create node + check for rediness + install k3s + check for rediness + install maaster + */ + if err := a.ensureForMasters(); err != nil { + return err + } + + if err := func() error { + switch a.node.NodeType { + case "spot": + return fmt.Errorf("spot is not supported as a master") + default: + return nil + } + }(); err != nil { + return err + } + + if err := a.SetupSSH(); err != nil { + return err + } + defer a.saveForSure() + sshPath := path.Join("/tmp/ssh", a.accountName) + + if err := a.NewNode(ctx); err != nil { + return err + } + + ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + if err != nil { + return err + } + + count := 0 + + for { + if e := utils.ExecCmd( + fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", + fmt.Sprintf("%v/access", sshPath), + string(ip), + ), + "checking is node is ready"); e == nil { + break + } + + count++ + if count > 24 { + return fmt.Errorf("node is not ready even after 6 minutes") + } + time.Sleep(time.Second * 5) + } + + masterToken := guuid.New() + + // install k3s + cmd := fmt.Sprintf( + "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --token=%s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s --cluster-init", + sshPath, + string(ip), + masterToken.String(), + string(ip), + a.node.NodeId, + ) + + if err := utils.ExecCmd(cmd, "installing k3s"); err != nil { + return err + } + // needed to fetch kubeconfig + + configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", sshPath, string(ip)), "fetching kubeconfig from the cluster") + if err != nil { + return err + } + + var kubeconfig common.KubeConfigType + if err := yaml.Unmarshal(configOut, &kubeconfig); err != nil { + return err + } + + for i := range kubeconfig.Clusters { + kubeconfig.Clusters[i].Cluster.Server = fmt.Sprintf("https://%s:6443", string(ip)) + } + + kc, err := yaml.Marshal(kubeconfig) + if err != nil { + return err + } + + tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", sshPath, string(ip)), "fetching node token from the cluster") + if err != nil { + return err + } + + st := TokenAndKubeconfig{ + Token: string(tokenOut), + Kubeconfig: string(kc), + ServerIp: string(ip), + MasterToken: masterToken.String(), + } + + b, err := yaml.Marshal(st) + if err != nil { + return err + } + + tokenPath := path.Join(sshPath, "config.yaml") + + if err := os.WriteFile(tokenPath, b, os.ModePerm); err != nil { + return err + } + + if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountName)); err != nil { + return err + } + + return err +} + +func parseValues(a AwsClient, sshPath string) map[string]string { + values := map[string]string{} + + values["access_key"] = a.accessKey + values["secret_key"] = a.accessSecret + + values["region"] = a.node.Region + values["node_id"] = a.node.NodeId + values["instance_type"] = a.node.InstanceType + values["keys-path"] = sshPath + values["ami"] = a.node.ImageId + + return values +} diff --git a/apps/nodectrl/internal/domain/aws/main.go b/apps/nodectrl/internal/domain/aws/main.go index fbdc2e17c..1732ff935 100644 --- a/apps/nodectrl/internal/domain/aws/main.go +++ b/apps/nodectrl/internal/domain/aws/main.go @@ -5,10 +5,8 @@ import ( "fmt" "os" "path" - "strings" "time" - guuid "github.com/google/uuid" "gopkg.in/yaml.v2" "kloudlite.io/apps/nodectrl/internal/domain/common" @@ -29,236 +27,89 @@ type AWSNode struct { VPC string `yaml:"vpc"` ImageId string `yaml:"imageId"` IsGpu bool `yaml:"isGpu"` + NodeType string `yaml:"nodeType" json:"nodeType"` } -type awsClient struct { +type AwsClient struct { node AWSNode awsS3Client awss3.AwsS3 accessKey string accessSecret string - accountId string + accountName string - SSHPath string + // SSHPath string tfTemplates string labels map[string]string taints []string } -type tokenAndKubeconfig struct { +type TokenAndKubeconfig struct { Token string `json:"token"` Kubeconfig string `json:"kubeconfig"` ServerIp string `json:"serverIp"` MasterToken string `json:"masterToken"` } -// AddMaster implements common.ProviderClient. -func (a awsClient) AddMaster(ctx context.Context) error { - // fetch token - a.SSHPath = path.Join("/tmp/ssh", a.accountId) - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountId) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { - return err - } +type NodeConfig struct { + ServerIP string `yaml:"serverIp"` + Token string `yaml:"token"` + NodeName string `yaml:"nodeName"` + Taints []string `yaml:"taints"` + Labels map[string]string `yaml:"labels"` +} - if _, err := os.Stat(a.SSHPath); err != nil { - if e := os.Mkdir(a.SSHPath, os.ModePerm); e != nil { - return e +func (a AwsClient) ensurePaths() error { + const sshDir = "/tmp/ssh" + sshPath := path.Join(sshDir, a.accountName) + if _, err := os.Stat(sshDir); err != nil { + if err := os.Mkdir(sshDir, os.ModePerm); err != nil { + return err } } - tokenPath := path.Join(a.SSHPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := tokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", a.SSHPath), - string(ip), - ), - "checking if node is ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") + if _, err := os.Stat(sshPath); err != nil { + if err := os.Mkdir(sshPath, os.ModePerm); err != nil { + return err } - time.Sleep(time.Second * 5) - } - - // attach to cluster as master - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --server https://%s:6443 --token %s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s", - a.SSHPath, - string(ip), - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - string(ip), - a.node.NodeId, - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a master"); err != nil { - return err } return nil } -func (a awsClient) AddWorker(ctx context.Context) error { - // fetch token - - a.SSHPath = path.Join("/tmp/ssh", a.accountId) - - if _, err := os.Stat(a.SSHPath); err != nil { - if e := os.Mkdir(a.SSHPath, os.ModePerm); e != nil { - return e - } - } - - tokenFileName := fmt.Sprintf("%s-config.yaml", a.accountId) - - if err := a.awsS3Client.IsFileExists(tokenFileName); err != nil { +func (a AwsClient) writeNodeConfig(kc TokenAndKubeconfig) error { + if err := a.ensurePaths(); err != nil { return err } - tokenPath := path.Join(a.SSHPath, "config.yaml") - if err := a.awsS3Client.DownloadFile(tokenPath, tokenFileName); err != nil { - return err - } - - b, err := os.ReadFile(tokenPath) - if err != nil { - return err - } - - kc := tokenAndKubeconfig{} - - if err := yaml.Unmarshal(b, &kc); err != nil { - return err - } - - // setup ssh - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() + const sshDir = "/tmp/ssh" + sshPath := path.Join(sshDir, a.accountName) + dataPath := path.Join(sshPath, "data.yaml") - // create node and wait for ready - if err := a.NewNode(ctx); err != nil { - return err + nc := NodeConfig{ + ServerIP: kc.ServerIp, + Token: kc.Token, + NodeName: a.node.NodeId, + Taints: []string{}, + Labels: map[string]string{}, } - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") + out, err := yaml.Marshal(nc) if err != nil { return err } - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%s/access", a.SSHPath), - string(ip), - ), - "checking if node ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - labels := func() []string { - l := []string{} - for k, v := range map[string]string{ - "kloudlite.io/public-ip": string(ip), - } { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - - for k, v := range a.labels { - l = append(l, fmt.Sprintf("--node-label %s=%s", k, v)) - } - return l - }() - - // attach to cluster as workernode - - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh agent --server https://%s:6443 --token=%s --node-external-ip %s --node-name %s %s %s", - a.SSHPath, - ip, - kc.ServerIp, - strings.TrimSpace(string(kc.Token)), - ip, - a.node.NodeId, - strings.Join(labels, " "), - func() string { - if a.node.IsGpu { - // return "--docker" - // return "--docker" - return "" - } - return "" - }(), - ) - - if err := utils.ExecCmd(cmd, "attaching to cluster as a worker node"); err != nil { - return err - } - - return nil + return os.WriteFile(dataPath, out, os.ModePerm) } -func (a awsClient) SetupSSH() error { +func (a AwsClient) SetupSSH() error { const sshDir = "/tmp/ssh" - - if _, err := os.Stat(sshDir); err != nil { - return os.Mkdir(sshDir, os.ModePerm) + if err := a.ensurePaths(); err != nil { + return err } - destDir := path.Join(sshDir, a.accountId) - fileName := fmt.Sprintf("%s.zip", a.accountId) + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) if err := a.awsS3Client.IsFileExists(fileName); err != nil { @@ -304,7 +155,7 @@ func (a awsClient) SetupSSH() error { return nil } -func (a awsClient) saveForSure() error { +func (a AwsClient) saveForSure() error { count := 0 for { if err := a.saveSSH(); err == nil { @@ -319,10 +170,10 @@ func (a awsClient) saveForSure() error { } } -func (a awsClient) saveSSH() error { +func (a AwsClient) saveSSH() error { const sshDir = "/tmp/ssh" - destDir := path.Join(sshDir, a.accountId) - fileName := fmt.Sprintf("%s.zip", a.accountId) + destDir := path.Join(sshDir, a.accountName) + fileName := fmt.Sprintf("%s.zip", a.accountName) if err := utils.ZipSource(destDir, path.Join(sshDir, fileName)); err != nil { return err @@ -335,132 +186,7 @@ func (a awsClient) saveSSH() error { return nil } -// CreateCluster implements common.ProviderClient -func (a awsClient) CreateCluster(ctx context.Context) error { - /* - create node - check for rediness - install k3s - check for rediness - install maaster - */ - - if err := a.SetupSSH(); err != nil { - return err - } - defer a.saveForSure() - a.SSHPath = path.Join("/tmp/ssh", a.accountId) - - if err := a.NewNode(ctx); err != nil { - return err - } - - ip, err := utils.GetOutput(path.Join(utils.Workdir, a.node.NodeId), "node-ip") - if err != nil { - return err - } - - count := 0 - - for { - if e := utils.ExecCmd( - fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s root@%s ls", - fmt.Sprintf("%v/access", a.SSHPath), - string(ip), - ), - "checking is node is ready"); e == nil { - break - } - - count++ - if count > 24 { - return fmt.Errorf("node is not ready even after 6 minutes") - } - time.Sleep(time.Second * 5) - } - - masterToken := guuid.New() - - // install k3s - cmd := fmt.Sprintf( - "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s sudo sh /tmp/k3s-install.sh server --token=%s --node-external-ip %s --flannel-backend wireguard-native --flannel-external-ip --disable traefik --node-name=%s --cluster-init", - a.SSHPath, - string(ip), - masterToken.String(), - string(ip), - a.node.NodeId, - ) - - if err := utils.ExecCmd(cmd, "installing k3s"); err != nil { - return err - } - // needed to fetch kubeconfig - - configOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /etc/rancher/k3s/k3s.yaml", a.SSHPath, string(ip)), "fetching kubeconfig from the cluster") - if err != nil { - return err - } - - var kubeconfig common.KubeConfigType - if err := yaml.Unmarshal(configOut, &kubeconfig); err != nil { - return err - } - - for i := range kubeconfig.Clusters { - kubeconfig.Clusters[i].Cluster.Server = fmt.Sprintf("https://%s:6443", string(ip)) - } - - kc, err := yaml.Marshal(kubeconfig) - if err != nil { - return err - } - - tokenOut, err := utils.ExecCmdWithOutput(fmt.Sprintf("ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i %s/access root@%s cat /var/lib/rancher/k3s/server/node-token", a.SSHPath, string(ip)), "fetching node token from the cluster") - if err != nil { - return err - } - - st := tokenAndKubeconfig{ - Token: string(tokenOut), - Kubeconfig: string(kc), - ServerIp: string(ip), - MasterToken: masterToken.String(), - } - - b, err := yaml.Marshal(st) - if err != nil { - return err - } - - tokenPath := path.Join(a.SSHPath, "config.yaml") - - if err := os.WriteFile(tokenPath, b, os.ModePerm); err != nil { - return err - } - - if err := a.awsS3Client.UploadFile(tokenPath, fmt.Sprintf("%s-config.yaml", a.accountId)); err != nil { - return err - } - - return err -} - -func parseValues(a awsClient) map[string]string { - values := map[string]string{} - - values["access_key"] = a.accessKey - values["secret_key"] = a.accessSecret - - values["region"] = a.node.Region - values["node_id"] = a.node.NodeId - values["instance_type"] = a.node.InstanceType - values["keys-path"] = a.SSHPath - values["ami"] = a.node.ImageId - - return values -} - -func (a awsClient) SaveToDbGuranteed(ctx context.Context) { +func (a AwsClient) SaveToDbGuranteed(ctx context.Context) { for { if err := utils.SaveToDb(a.node.NodeId, a.awsS3Client); err == nil { break @@ -471,18 +197,29 @@ func (a awsClient) SaveToDbGuranteed(ctx context.Context) { } } -// NewNode implements ProviderClient -func (a awsClient) NewNode(ctx context.Context) error { - values := parseValues(a) - - if true { - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws"), a.awsS3Client, true); err != nil { - return err +func (a AwsClient) getAwsTemplatePath() string { + return path.Join(a.tfTemplates, func() string { + switch a.node.NodeType { + case "spot": + return "aws-spot" + default: + return "aws" } + }(), + ) +} + +// NewNode implements ProviderClient +func (a AwsClient) NewNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + values := parseValues(a, sshPath) - defer a.SaveToDbGuranteed(ctx) + if err := utils.MakeTfWorkFileReady(a.node.NodeId, a.getAwsTemplatePath(), a.awsS3Client, true); err != nil { + return err } + defer a.SaveToDbGuranteed(ctx) + // upload the final state to the db, upsert if db is already present // apply the tf file @@ -504,8 +241,9 @@ func (a awsClient) NewNode(ctx context.Context) error { } // DeleteNode implements ProviderClient -func (a awsClient) DeleteNode(ctx context.Context) error { - values := parseValues(a) +func (a AwsClient) DeleteNode(ctx context.Context) error { + sshPath := path.Join("/tmp/ssh", a.accountName) + values := parseValues(a, sshPath) /* steps: @@ -516,7 +254,7 @@ func (a awsClient) DeleteNode(ctx context.Context) error { - delete final state */ - if err := utils.MakeTfWorkFileReady(a.node.NodeId, path.Join(a.tfTemplates, "aws"), a.awsS3Client, false); err != nil { + if err := utils.MakeTfWorkFileReady(a.node.NodeId, a.getAwsTemplatePath(), a.awsS3Client, false); err != nil { return err } @@ -540,17 +278,16 @@ func NewAwsProviderClient(node AWSNode, cpd common.CommonProviderData, apc AwsPr return nil, err } - return awsClient{ + return AwsClient{ node: node, awsS3Client: awsS3Client, accessKey: apc.AccessKey, accessSecret: apc.AccessSecret, - accountId: apc.AccountName, + accountName: apc.AccountName, tfTemplates: cpd.TfTemplates, labels: cpd.Labels, taints: cpd.Taints, - SSHPath: cpd.SSHPath, }, nil } diff --git a/apps/nodectrl/internal/domain/provider-client-fx.go b/apps/nodectrl/internal/domain/provider-client-fx.go index f1f7368da..54879ee20 100644 --- a/apps/nodectrl/internal/domain/provider-client-fx.go +++ b/apps/nodectrl/internal/domain/provider-client-fx.go @@ -6,6 +6,7 @@ import ( "go.uber.org/fx" "kloudlite.io/apps/nodectrl/internal/domain/aws" + awsspot "kloudlite.io/apps/nodectrl/internal/domain/aws-spot" "kloudlite.io/apps/nodectrl/internal/domain/common" "kloudlite.io/apps/nodectrl/internal/domain/do" "kloudlite.io/apps/nodectrl/internal/domain/utils" @@ -44,6 +45,7 @@ var ProviderClientFx = fx.Module("provider-client-fx", } return aws.NewAwsProviderClient(node, cpd, apc) + case "azure": panic("not implemented") case "do": diff --git a/apps/nodectrl/internal/env/env.go b/apps/nodectrl/internal/env/env.go index 4cb170063..7197ebe2b 100644 --- a/apps/nodectrl/internal/env/env.go +++ b/apps/nodectrl/internal/env/env.go @@ -9,9 +9,6 @@ type Env struct { NodeConfig string `env:"NODE_CONFIG" required:"true"` ProviderConfig string `env:"PROVIDER_CONFIG" required:"true"` - // DBUrl string `env:"DB_URL" required:"true"` - // DBName string `env:"DB_NAME" required:"true"` - AWSProviderConfig string `env:"AWS_PROVIDER_CONFIG"` GCPProviderConfig string `env:"GCP_PROVIDER_CONFIG"` AzureProviderConfig string `env:"AZURE_PROVIDER_CONFIG"` diff --git a/apps/nodectrl/terraform/aws-spot/init.sh b/apps/nodectrl/terraform/aws-spot/init.sh new file mode 100644 index 000000000..2c2ad0a7c --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +mkdir /k3s +cat >> /k3s/data.yaml << EOF +${nodeConfigYaml} +EOF + +cat >> /root/.ssh/authorized_keys << EOF +${pubkey} +EOF diff --git a/apps/nodectrl/terraform/aws-spot/resource.tf b/apps/nodectrl/terraform/aws-spot/resource.tf new file mode 100644 index 000000000..6a104d34f --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/resource.tf @@ -0,0 +1,181 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.3.0" + } + } + required_version = ">= 1.2.0" +} + +provider "aws" { + region = var.region + access_key = var.access_key + secret_key = var.secret_key +} + +output "node-name" { + value = var.node_id +} + + +data "aws_caller_identity" "current" {} + +resource "aws_security_group" "sg" { + + name = "sg-${var.node_id}" + + ingress { + from_port = 22 + protocol = "tcp" + to_port = 22 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 2379 + protocol = "tcp" + to_port = 2379 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 2380 + protocol = "tcp" + to_port = 2380 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 6443 + protocol = "tcp" + to_port = 6443 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 8472 + protocol = "udp" + to_port = 8472 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 9100 + protocol = "tcp" + to_port = 9100 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 51820 + protocol = "udp" + to_port = 51820 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 51821 + protocol = "udp" + to_port = 51821 + cidr_blocks = ["0.0.0.0/0"] + } + + + ingress { + from_port = 10250 + protocol = "tcp" + to_port = 10250 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 80 + protocol = "tcp" + to_port = 80 + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 443 + protocol = "tcp" + to_port = 443 + cidr_blocks = ["0.0.0.0/0"] + } + + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + +} + + +resource "aws_launch_template" "spot-template" { + name = var.node_id + image_id = "ami-0e63f370aa626048d" + + + user_data = base64encode(templatefile("./init.sh", { + pubkey = file("${var.keys-path}/access.pub") + nodeConfigYaml = file("${var.keys-path}/data.yaml") + # hostname = var.node_id + })) + + + + block_device_mappings { + device_name = "/dev/sda1" + ebs { + volume_size = 40 + } + } + + network_interfaces { + associate_public_ip_address = true + security_groups = [aws_security_group.sg.id] + } + + tag_specifications { + resource_type = "instance" + tags = { + Name = var.node_id + } + } +} + + + +resource "aws_spot_fleet_request" "byoc-spot-node" { + iam_fleet_role = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-ec2-spot-fleet-tagging-role" + + target_capacity = 1 + + terminate_instances_on_delete = true + on_demand_target_capacity = 0 + allocation_strategy = "priceCapacityOptimized" + on_demand_allocation_strategy = "lowestPrice" + + + launch_template_config { + launch_template_specification { + id = aws_launch_template.spot-template.id + version = "1" + } + overrides { + instance_requirements { + vcpu_count { + min = 4 + max = 4 + } + memory_mib { + min = 8192 + max = 8192 + } + } + } + } +} diff --git a/apps/nodectrl/terraform/aws-spot/variables.tf b/apps/nodectrl/terraform/aws-spot/variables.tf new file mode 100644 index 000000000..50e52813a --- /dev/null +++ b/apps/nodectrl/terraform/aws-spot/variables.tf @@ -0,0 +1,19 @@ +variable "access_key" { + default = "" +} + +variable "secret_key" { + default = "" +} + +variable "region" { + default = "" +} + +variable "node_id" { + default = "" +} + +variable "keys-path" { + default = "" +} diff --git a/apps/nodectrl/terraform/aws/resource.tf b/apps/nodectrl/terraform/aws/resource.tf index 0ad99ab34..736c263a9 100644 --- a/apps/nodectrl/terraform/aws/resource.tf +++ b/apps/nodectrl/terraform/aws/resource.tf @@ -165,7 +165,6 @@ resource "aws_instance" "byoc-node" { } - output "node-ip" { value = aws_instance.byoc-node.public_ip }