Skip to content
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ jobs:
cloud:
name: e2e k8s-${{ matrix.k3s }}
runs-on: ubuntu-latest
# Fork pull requests cannot access the required secrets, so only repository branches may run this job.
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository

permissions:
id-token: write
Expand Down Expand Up @@ -156,6 +158,8 @@ jobs:
robot:
name: e2e robot
runs-on: ubuntu-latest
# Fork pull requests cannot access the required secrets, so only repository branches may run this job.
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository

permissions:
id-token: write
Expand Down
2 changes: 1 addition & 1 deletion hcloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func NewCloud(cidr string) (cloudprovider.Interface, error) {

robotClient = robot.NewRateLimitedClient(
cfg.Robot.RateLimitWaitTime,
robot.NewCachedClient(cfg.Robot.CacheTimeout, c),
robot.NewCachedClient(cfg.Robot.CacheTimeout, robot.NewClient(c)),
)
}

Expand Down
5 changes: 3 additions & 2 deletions hcloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/client-go/tools/record"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud/schema"
Expand All @@ -40,7 +41,7 @@ type testEnv struct {
Server *httptest.Server
Mux *http.ServeMux
Client *hcloud.Client
RobotClient hrobot.RobotClient
RobotClient robot.Client
Recorder record.EventRecorder
Cfg config.HCCMConfiguration
}
Expand Down Expand Up @@ -75,7 +76,7 @@ func newTestEnv() testEnv {
Server: server,
Mux: mux,
Client: client,
RobotClient: robotClient,
RobotClient: robot.NewClient(robotClient),
Recorder: recorder,
Cfg: cfg,
}
Expand Down
66 changes: 66 additions & 0 deletions hcloud/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"os"
"reflect"
"testing"
"time"

"github.com/stretchr/testify/assert"
hrobotmodels "github.com/syself/hrobot-go/models"
Expand All @@ -32,6 +33,7 @@ import (
cloudprovider "k8s.io/cloud-provider"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud/schema"
)
Expand Down Expand Up @@ -189,6 +191,70 @@ func TestInstances_InstanceExists(t *testing.T) {
}
}

// TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill verifies
// that InstanceExists refreshes the Robot cache after a name lookup miss,
// so a bare-metal server created after the cache was warmed is found.
func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) {
env := newTestEnv()
defer env.Teardown()

servers := make([]hrobotmodels.Server, 0, 2)
servers = append(servers,
hrobotmodels.Server{
ServerIP: "233.252.0.123",
ServerIPv6Net: "2a01:f48:111:4221::",
ServerNumber: 321,
Name: "robot-server1",
},
)

env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) {
responses := make([]hrobotmodels.ServerResponse, 0, len(servers))
for _, server := range servers {
responses = append(responses, hrobotmodels.ServerResponse{Server: server})
}
json.NewEncoder(w).Encode(responses)
})
env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, _ *http.Request) {
json.NewEncoder(w).Encode(schema.ServerListResponse{Servers: []schema.Server{}})
})

instances := newInstances(
env.Client,
robot.NewCachedClient(time.Hour, env.RobotClient),
env.Recorder,
0,
env.Cfg,
)

exists, err := instances.InstanceExists(context.TODO(), &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "robot-server1"},
})
if err != nil {
t.Fatalf("Unexpected error warming cache: %v", err)
}
if !exists {
t.Fatal("Expected robot-server1 to exist")
}

servers = append(servers, hrobotmodels.Server{
ServerIP: "233.252.0.124",
ServerIPv6Net: "2a01:f48:111:4222::",
ServerNumber: 322,
Name: "robot-server2",
})

exists, err = instances.InstanceExists(context.TODO(), &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "robot-server2"},
})
if err != nil {
t.Fatalf("Unexpected error after creating robot-server2: %v", err)
}
if !exists {
t.Fatal("Expected robot-server2 to exist after it was created")
}
}

func TestInstances_InstanceShutdown(t *testing.T) {
env := newTestEnv()
defer env.Teardown()
Expand Down
16 changes: 14 additions & 2 deletions hcloud/instances_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,23 @@ func getRobotServerByName(c robot.Client, node *corev1.Node) (server *hrobotmode

for i, s := range serverList {
if s.Name == node.Name {
server = &serverList[i]
return &serverList[i], nil
}
}

return server, nil
serverList, err = c.ServerGetListForceRefresh(node.Name)
if err != nil {
return nil, fmt.Errorf("%s: force refresh after cache miss: %w", op, err)
}

for i, s := range serverList {
if s.Name == node.Name {
return &serverList[i], nil
}
}

// No matching Robot server exists for this node, even after refreshing the cached list.
return nil, nil
}

func getRobotServerByID(i *instances, id int, node *corev1.Node) (*hrobotmodels.Server, error) {
Expand Down
35 changes: 35 additions & 0 deletions hcloud/instances_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,38 @@ func TestGetRobotServerByID(t *testing.T) {
})
}
}

func TestGetRobotServerByNameForceRefreshFindsNewServer(t *testing.T) {
robotClientMock := &mocks.RobotClient{}
robotClientMock.Test(t)
robotClientMock.On("ServerGetList").Return([]hrobotmodels.Server{
{ServerNumber: 321, Name: "robot-server1"},
}, nil).Once()
robotClientMock.On("ServerGetListForceRefresh", "robot-server2").Return([]hrobotmodels.Server{
{ServerNumber: 322, Name: "robot-server2"},
}, nil).Once()

server, err := getRobotServerByName(robotClientMock, &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "robot-server2"},
})
require.NoError(t, err)
require.NotNil(t, server)
assert.Equal(t, "robot-server2", server.Name)
}

func TestGetRobotServerByNameReturnsNilAfterForceRefreshMiss(t *testing.T) {
robotClientMock := &mocks.RobotClient{}
robotClientMock.Test(t)
robotClientMock.On("ServerGetList").Return([]hrobotmodels.Server{
{ServerNumber: 321, Name: "robot-server1"},
}, nil).Once()
robotClientMock.On("ServerGetListForceRefresh", "robot-server2").Return([]hrobotmodels.Server{
{ServerNumber: 321, Name: "robot-server1"},
}, nil).Once()

server, err := getRobotServerByName(robotClientMock, &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "robot-server2"},
})
require.NoError(t, err)
assert.Nil(t, server)
}
5 changes: 5 additions & 0 deletions internal/mocks/robot.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ func (m *RobotClient) ServerGetList() ([]hrobotmodels.Server, error) {
return getRobotServers(args, 0), args.Error(1)
}

func (m *RobotClient) ServerGetListForceRefresh(nodeName string) ([]hrobotmodels.Server, error) {
args := m.Called(nodeName)
return getRobotServers(args, 0), args.Error(1)
}

func (m *RobotClient) BootLinuxDelete(id int) (*hrobotmodels.Linux, error) {
panic("this method should not be called")
}
Expand Down
23 changes: 23 additions & 0 deletions internal/robot/adapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package robot

import (
hrobot "github.com/syself/hrobot-go"
hrobotmodels "github.com/syself/hrobot-go/models"
)

// adapter wraps hrobot.RobotClient so it satisfies this package's Client interface.
type adapter struct {
hrobot.RobotClient
}

func NewClient(robotClient hrobot.RobotClient) Client {
if robotClient == nil {
return nil
}

return &adapter{RobotClient: robotClient}
}

func (a *adapter) ServerGetListForceRefresh(_ string) ([]hrobotmodels.Server, error) {
return a.ServerGetList()
}
44 changes: 44 additions & 0 deletions internal/robot/adapter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package robot

import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
hrobot "github.com/syself/hrobot-go"
hrobotmodels "github.com/syself/hrobot-go/models"
)

func TestNewClientNil(t *testing.T) {
assert.Nil(t, NewClient(nil))
}

func TestAdapterServerGetListForceRefresh(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, "/robot/server", r.URL.Path)
err := json.NewEncoder(w).Encode([]hrobotmodels.ServerResponse{
{
Server: hrobotmodels.Server{
ServerNumber: 321,
Name: "robot-server1",
},
},
})
assert.NoError(t, err)
}))
defer server.Close()

robotClient := hrobot.NewBasicAuthClient("", "")
robotClient.SetBaseURL(server.URL + "/robot")

client := NewClient(robotClient)
require.NotNil(t, client)

servers, err := client.ServerGetListForceRefresh("robot-server1")
require.NoError(t, err)
require.Len(t, servers, 1)
assert.Equal(t, "robot-server1", servers[0].Name)
}
63 changes: 60 additions & 3 deletions internal/robot/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,26 @@ type cacheRobotClient struct {
timeout time.Duration

lastUpdate time.Time
now func() time.Time
// mutex is necessary to synchronize parallel access to the cache
mutex sync.Mutex

// cache
servers []hrobotmodels.Server
serversByID map[int]*hrobotmodels.Server

// forcedRefreshServerNames tracks which node names already triggered a forced refresh within the current cache timeout window.
forcedRefreshServerNames map[string]time.Time
}

func NewCachedClient(cacheTimeout time.Duration, robotClient Client) Client {
return &cacheRobotClient{
timeout: cacheTimeout,
robotClient: robotClient,
now: time.Now,

serversByID: make(map[int]*hrobotmodels.Server),
serversByID: make(map[int]*hrobotmodels.Server),
forcedRefreshServerNames: make(map[string]time.Time),
}
}

Expand Down Expand Up @@ -57,13 +63,40 @@ func (c *cacheRobotClient) ServerGetList() ([]hrobotmodels.Server, error) {
return c.servers, nil
}

// ServerGetListForceRefresh refreshes the server list immediately, unless the same node already forced a refresh within the current cache timeout window.
func (c *cacheRobotClient) ServerGetListForceRefresh(nodeName string) ([]hrobotmodels.Server, error) {
c.mutex.Lock()
defer c.mutex.Unlock()

if nodeName != "" && c.nodeHasAlreadyForcedRefresh(nodeName) {
if err := c.updateCacheIfNecessary(); err != nil {
return nil, err
}
return c.servers, nil
}

if err := c.refreshCache(); err != nil {
return nil, err
}

if nodeName != "" {
c.forcedRefreshServerNames[nodeName] = c.currentTime()
}

return c.servers, nil
}

// Make sure to lock the mutext before calling updateCacheIfNecessary.
func (c *cacheRobotClient) updateCacheIfNecessary() error {
nextUpdate := c.lastUpdate.Add(c.timeout)
if time.Now().Before(nextUpdate) {
if c.currentTime().Before(nextUpdate) {
return nil
}

return c.refreshCache()
}

func (c *cacheRobotClient) refreshCache() error {
servers, err := c.robotClient.ServerGetList()
if err != nil {
return err
Expand All @@ -79,10 +112,34 @@ func (c *cacheRobotClient) updateCacheIfNecessary() error {
}

// set time of last update
c.lastUpdate = time.Now()
c.lastUpdate = c.currentTime()
return nil
}

// nodeHasAlreadyForcedRefresh reports whether this node already triggered a forced refresh within the current cache timeout window and drops expired entries.
func (c *cacheRobotClient) nodeHasAlreadyForcedRefresh(nodeName string) bool {
forcedAt, found := c.forcedRefreshServerNames[nodeName]
if !found {
return false
}

if c.currentTime().After(forcedAt.Add(c.timeout)) {
delete(c.forcedRefreshServerNames, nodeName)
return false
}

return true
}

// currentTime centralizes access to the clock so tests can inject a deterministic time source via c.now.
func (c *cacheRobotClient) currentTime() time.Time {
if c.now == nil {
return time.Now()
}

return c.now()
}

// ResetGet does not use the cache, as we need up to date information for its function.
func (c *cacheRobotClient) ResetGet(id int) (*hrobotmodels.Reset, error) {
return c.robotClient.ResetGet(id)
Expand Down
Loading
Loading