From 1665e66e83bb179b880923d213a41a92e1614a51 Mon Sep 17 00:00:00 2001 From: "alekhya.kondapuram" Date: Fri, 9 Feb 2024 12:34:02 -0800 Subject: [PATCH 1/5] Retry the grpc connection when there's an error Signed-off-by: alekhya.kondapuram --- src/provider/xds_grpc_sotw_provider.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index fcacc212d..5858cccab 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -99,11 +99,8 @@ func (p *XdsGrpcSotwProvider) watchConfigs() { resp, err := p.adsClient.Fetch() if err != nil { logger.Errorf("Failed to receive configuration from xDS Management Server: %s", err.Error()) - if sotw.IsConnError(err) { - p.retryGrpcConn() - return - } - p.adsClient.Nack(err.Error()) + p.retryGrpcConn() + return } else { logger.Tracef("Response received from xDS Management Server: %v", resp) p.sendConfigs(resp.Resources) From bc34412e154ae2e889cc4ac488581ac710af407e Mon Sep 17 00:00:00 2001 From: "alekhya.kondapuram" Date: Wed, 14 Feb 2024 19:04:56 -0800 Subject: [PATCH 2/5] Add exponential backoff Signed-off-by: alekhya.kondapuram --- go.mod | 1 + go.sum | 2 ++ src/provider/xds_grpc_sotw_provider.go | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/go.mod b/go.mod index fe94a09fd..45ef3a66d 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/google/uuid v1.4.0 github.com/gorilla/mux v1.8.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 + github.com/jpillora/backoff v1.0.0 github.com/kavu/go_reuseport v1.5.0 github.com/kelseyhightower/envconfig v1.4.0 github.com/lyft/goruntime v0.3.0 diff --git a/go.sum b/go.sum index 9b62a6f48..2b8a48b3a 100644 --- a/go.sum +++ b/go.sum @@ -77,6 +77,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDa github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/kavu/go_reuseport v1.5.0 h1:UNuiY2OblcqAtVDE8Gsg1kZz8zbBWg907sP1ceBV+bk= github.com/kavu/go_reuseport v1.5.0/go.mod h1:CG8Ee7ceMFSMnx/xr25Vm0qXaj2Z4i5PWoUx+JZ5/CU= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index 5858cccab..1f45ddbf3 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "google.golang.org/grpc/metadata" @@ -11,6 +12,7 @@ import ( "github.com/envoyproxy/go-control-plane/pkg/resource/v3" "github.com/golang/protobuf/ptypes/any" grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" + "github.com/jpillora/backoff" logger "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -76,10 +78,25 @@ func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Stopping xDS client watch for rate limit configurations") break } + + d := p.getJitteredExponentialBackOffDuration() + logger.Infof("Sleeping for %s using exponential backoff\n", d) + time.Sleep(d) conn = p.initializeAndWatch() } } +func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration() time.Duration { + b := &backoff.Backoff{ + Min: 10 * time.Second, + Max: 10 * time.Minute, + Factor: 0.5, + Jitter: true, + } + logger.Infof("Retry attempt# %f", b.Attempt()) + return b.Duration() +} + func (p *XdsGrpcSotwProvider) initializeAndWatch() *grpc.ClientConn { conn, err := p.getGrpcConnection() if err != nil { From 1e4ad3bd491ee8bd3e5ba430487e33943d7ef2c1 Mon Sep 17 00:00:00 2001 From: "alekhya.kondapuram" Date: Thu, 15 Feb 2024 13:12:18 -0800 Subject: [PATCH 3/5] Refactor Signed-off-by: alekhya.kondapuram --- src/provider/xds_grpc_sotw_provider.go | 17 ++++++++--------- src/settings/settings.go | 6 ++++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index 1f45ddbf3..0e59bcf00 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -69,6 +69,12 @@ func (p *XdsGrpcSotwProvider) Stop() { func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Starting xDS client connection for rate limit configurations") conn := p.initializeAndWatch() + b := &backoff.Backoff{ + Min: p.settings.XdsClientBackoffInitialInterval, + Max: p.settings.XdsClientBackoffMaxInterval, + Factor: p.settings.XdsClientBackoffRandomFactor, + Jitter: p.settings.XdsClientBackoffJitter, + } for retryEvent := range p.connectionRetryChannel { if conn != nil { @@ -78,21 +84,14 @@ func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Stopping xDS client watch for rate limit configurations") break } - - d := p.getJitteredExponentialBackOffDuration() + d := p.getJitteredExponentialBackOffDuration(b) logger.Infof("Sleeping for %s using exponential backoff\n", d) time.Sleep(d) conn = p.initializeAndWatch() } } -func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration() time.Duration { - b := &backoff.Backoff{ - Min: 10 * time.Second, - Max: 10 * time.Minute, - Factor: 0.5, - Jitter: true, - } +func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration(b *backoff.Backoff) time.Duration { logger.Infof("Retry attempt# %f", b.Attempt()) return b.Duration() } diff --git a/src/settings/settings.go b/src/settings/settings.go index 097047819..f3c3721d2 100644 --- a/src/settings/settings.go +++ b/src/settings/settings.go @@ -69,6 +69,12 @@ type Settings struct { // GrpcClientTlsSAN is the SAN to validate from the client cert during mTLS auth ConfigGrpcXdsServerTlsSAN string `envconfig:"CONFIG_GRPC_XDS_SERVER_TLS_SAN" default:""` + // xDS client backoff configuration + XdsClientBackoffInitialInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_INITIAL_INTERVAL" default:"10s"` + XdsClientBackoffMaxInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_MAX_INTERVAL" default:"60s"` + XdsClientBackoffRandomFactor float64 `envconfig:"XDS_CLIENT_BACKOFF_RANDOM_FACTOR" default:"0.5"` + XdsClientBackoffJitter bool `envconfig:"XDS_CLIENT_BACKOFF_JITTER" default:"true"` + // Stats-related settings UseStatsd bool `envconfig:"USE_STATSD" default:"true"` StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"` From 1e131142e941f3c54d22ca64b2319c44d4b1e847 Mon Sep 17 00:00:00 2001 From: "alekhya.kondapuram" Date: Fri, 23 Feb 2024 12:41:54 -0800 Subject: [PATCH 4/5] Update the README and set log level to debug Signed-off-by: alekhya.kondapuram --- README.md | 5 +++++ src/provider/xds_grpc_sotw_provider.go | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9dd39a7f5..3f1fdfd64 100644 --- a/README.md +++ b/README.md @@ -647,7 +647,12 @@ To enable this behavior set `MERGE_DOMAIN_CONFIG` to `true`. xDS Management Server is a gRPC server which implements the [Aggregated Discovery Service (ADS)](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/ads.proto). The xDS Management server serves [Discovery Response](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/discovery.proto#L69) with [Ratelimit Configuration Resources](api/ratelimit/config/ratelimit/v3/rls_conf.proto) and with Type URL `"type.googleapis.com/ratelimit.config.ratelimit.v3.RateLimitConfig"`. + The xDS client in the Rate limit service configure Rate limit service with the provided configuration. +In case of connection failures, the xDS Client retries the connection to the xDS server with exponential backoff and the backoff parameters are configurable. + +`XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff. + For more information on xDS protocol please refer to the [envoy proxy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol). You can refer to [the sample xDS configuration management server](examples/xds-sotw-config-server/README.md). diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index 0e59bcf00..9dc9faeeb 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -85,14 +85,14 @@ func (p *XdsGrpcSotwProvider) initXdsClient() { break } d := p.getJitteredExponentialBackOffDuration(b) - logger.Infof("Sleeping for %s using exponential backoff\n", d) + logger.Debugf("Sleeping for %s using exponential backoff\n", d) time.Sleep(d) conn = p.initializeAndWatch() } } func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration(b *backoff.Backoff) time.Duration { - logger.Infof("Retry attempt# %f", b.Attempt()) + logger.Debugf("Retry attempt# %f", b.Attempt()) return b.Duration() } From 32f546baaab7016a6b1a492c8461940cb4457e14 Mon Sep 17 00:00:00 2001 From: "alekhya.kondapuram" Date: Fri, 23 Feb 2024 13:32:16 -0800 Subject: [PATCH 5/5] Update README Signed-off-by: alekhya.kondapuram --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3f1fdfd64..25a698d89 100644 --- a/README.md +++ b/README.md @@ -651,7 +651,10 @@ and with Type URL `"type.googleapis.com/ratelimit.config.ratelimit.v3.RateLimitC The xDS client in the Rate limit service configure Rate limit service with the provided configuration. In case of connection failures, the xDS Client retries the connection to the xDS server with exponential backoff and the backoff parameters are configurable. -`XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff. +1. `XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff. +2. `XDS_CLIENT_BACKOFF_INITIAL_INTERVAL`: The base amount of time the xDS client waits before retyring the connection after failure. Default: "10s" +3. `XDS_CLIENT_BACKOFF_MAX_INTERVAL`: The max backoff interval is the upper limit on the amount of time the xDS client will wait between retries. After reaching the max backoff interval, the next retries will continue using the max interval. Default: "60s" +4. `XDS_CLIENT_BACKOFF_RANDOM_FACTOR`: This is a factor by which the initial interval is multiplied to calculate the next backoff interval. Default: "0.5" For more information on xDS protocol please refer to the [envoy proxy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol).