diff --git a/README.md b/README.md index 9dd39a7f5..25a698d89 100644 --- a/README.md +++ b/README.md @@ -647,7 +647,15 @@ To enable this behavior set `MERGE_DOMAIN_CONFIG` to `true`. xDS Management Server is a gRPC server which implements the [Aggregated Discovery Service (ADS)](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/ads.proto). The xDS Management server serves [Discovery Response](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/discovery.proto#L69) with [Ratelimit Configuration Resources](api/ratelimit/config/ratelimit/v3/rls_conf.proto) and with Type URL `"type.googleapis.com/ratelimit.config.ratelimit.v3.RateLimitConfig"`. + The xDS client in the Rate limit service configure Rate limit service with the provided configuration. +In case of connection failures, the xDS Client retries the connection to the xDS server with exponential backoff and the backoff parameters are configurable. + +1. `XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff. +2. `XDS_CLIENT_BACKOFF_INITIAL_INTERVAL`: The base amount of time the xDS client waits before retyring the connection after failure. Default: "10s" +3. `XDS_CLIENT_BACKOFF_MAX_INTERVAL`: The max backoff interval is the upper limit on the amount of time the xDS client will wait between retries. After reaching the max backoff interval, the next retries will continue using the max interval. Default: "60s" +4. `XDS_CLIENT_BACKOFF_RANDOM_FACTOR`: This is a factor by which the initial interval is multiplied to calculate the next backoff interval. Default: "0.5" + For more information on xDS protocol please refer to the [envoy proxy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol). You can refer to [the sample xDS configuration management server](examples/xds-sotw-config-server/README.md). diff --git a/go.mod b/go.mod index fe94a09fd..45ef3a66d 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/google/uuid v1.4.0 github.com/gorilla/mux v1.8.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 + github.com/jpillora/backoff v1.0.0 github.com/kavu/go_reuseport v1.5.0 github.com/kelseyhightower/envconfig v1.4.0 github.com/lyft/goruntime v0.3.0 diff --git a/go.sum b/go.sum index 9b62a6f48..2b8a48b3a 100644 --- a/go.sum +++ b/go.sum @@ -77,6 +77,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDa github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/kavu/go_reuseport v1.5.0 h1:UNuiY2OblcqAtVDE8Gsg1kZz8zbBWg907sP1ceBV+bk= github.com/kavu/go_reuseport v1.5.0/go.mod h1:CG8Ee7ceMFSMnx/xr25Vm0qXaj2Z4i5PWoUx+JZ5/CU= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index fcacc212d..9dc9faeeb 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "google.golang.org/grpc/metadata" @@ -11,6 +12,7 @@ import ( "github.com/envoyproxy/go-control-plane/pkg/resource/v3" "github.com/golang/protobuf/ptypes/any" grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" + "github.com/jpillora/backoff" logger "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -67,6 +69,12 @@ func (p *XdsGrpcSotwProvider) Stop() { func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Starting xDS client connection for rate limit configurations") conn := p.initializeAndWatch() + b := &backoff.Backoff{ + Min: p.settings.XdsClientBackoffInitialInterval, + Max: p.settings.XdsClientBackoffMaxInterval, + Factor: p.settings.XdsClientBackoffRandomFactor, + Jitter: p.settings.XdsClientBackoffJitter, + } for retryEvent := range p.connectionRetryChannel { if conn != nil { @@ -76,10 +84,18 @@ func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Stopping xDS client watch for rate limit configurations") break } + d := p.getJitteredExponentialBackOffDuration(b) + logger.Debugf("Sleeping for %s using exponential backoff\n", d) + time.Sleep(d) conn = p.initializeAndWatch() } } +func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration(b *backoff.Backoff) time.Duration { + logger.Debugf("Retry attempt# %f", b.Attempt()) + return b.Duration() +} + func (p *XdsGrpcSotwProvider) initializeAndWatch() *grpc.ClientConn { conn, err := p.getGrpcConnection() if err != nil { @@ -99,11 +115,8 @@ func (p *XdsGrpcSotwProvider) watchConfigs() { resp, err := p.adsClient.Fetch() if err != nil { logger.Errorf("Failed to receive configuration from xDS Management Server: %s", err.Error()) - if sotw.IsConnError(err) { - p.retryGrpcConn() - return - } - p.adsClient.Nack(err.Error()) + p.retryGrpcConn() + return } else { logger.Tracef("Response received from xDS Management Server: %v", resp) p.sendConfigs(resp.Resources) diff --git a/src/settings/settings.go b/src/settings/settings.go index 097047819..f3c3721d2 100644 --- a/src/settings/settings.go +++ b/src/settings/settings.go @@ -69,6 +69,12 @@ type Settings struct { // GrpcClientTlsSAN is the SAN to validate from the client cert during mTLS auth ConfigGrpcXdsServerTlsSAN string `envconfig:"CONFIG_GRPC_XDS_SERVER_TLS_SAN" default:""` + // xDS client backoff configuration + XdsClientBackoffInitialInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_INITIAL_INTERVAL" default:"10s"` + XdsClientBackoffMaxInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_MAX_INTERVAL" default:"60s"` + XdsClientBackoffRandomFactor float64 `envconfig:"XDS_CLIENT_BACKOFF_RANDOM_FACTOR" default:"0.5"` + XdsClientBackoffJitter bool `envconfig:"XDS_CLIENT_BACKOFF_JITTER" default:"true"` + // Stats-related settings UseStatsd bool `envconfig:"USE_STATSD" default:"true"` StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"`