From 0163b7ae5d5a0426618fed8f71526272cf2eeb01 Mon Sep 17 00:00:00 2001 From: David Eads Date: Tue, 4 Oct 2022 12:35:51 -0400 Subject: [PATCH 1/2] add etcd shim to microshift until we package proper etcd separately --- cmd/microshift/main.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cmd/microshift/main.go b/cmd/microshift/main.go index 8cbbc6932d..d46e1461a9 100644 --- a/cmd/microshift/main.go +++ b/cmd/microshift/main.go @@ -13,6 +13,7 @@ import ( cmds "github.com/openshift/microshift/pkg/cmd" "github.com/openshift/microshift/pkg/config" + "go.etcd.io/etcd/server/v3/etcdmain" ) func main() { @@ -43,7 +44,23 @@ func newCommand() *cobra.Command { ioStreams := genericclioptions.IOStreams{In: os.Stdin, Out: os.Stdout, ErrOut: os.Stderr} cmd.AddCommand(cmds.NewRunMicroshiftCommand()) + cmd.AddCommand(temporaryEtcdShim()) cmd.AddCommand(cmds.NewVersionCommand(ioStreams)) cmd.AddCommand(cmds.NewShowConfigCommand(ioStreams)) + + return cmd +} + +func temporaryEtcdShim() *cobra.Command { + cmd := &cobra.Command{ + Use: "etcd", + Short: "Run not-quite-etcd", + DisableFlagParsing: true, + RunE: func(cmd *cobra.Command, args []string) error { + etcdmain.Main(os.Args[1:]) + return nil + }, + } + return cmd } From 733c1341b69cf538d31fb0533336f1702c6529d8 Mon Sep 17 00:00:00 2001 From: David Eads Date: Tue, 4 Oct 2022 12:53:37 -0400 Subject: [PATCH 2/2] add etcd cmd vendor --- .../etcd/client/v3/leasing/cache.go | 306 ++++++++++ .../go.etcd.io/etcd/client/v3/leasing/doc.go | 46 ++ .../go.etcd.io/etcd/client/v3/leasing/kv.go | 479 ++++++++++++++++ .../go.etcd.io/etcd/client/v3/leasing/txn.go | 223 ++++++++ .../go.etcd.io/etcd/client/v3/leasing/util.go | 108 ++++ .../etcd/client/v3/namespace/doc.go | 43 ++ .../go.etcd.io/etcd/client/v3/namespace/kv.go | 206 +++++++ .../etcd/client/v3/namespace/lease.go | 57 ++ .../etcd/client/v3/namespace/util.go | 42 ++ .../etcd/client/v3/namespace/watch.go | 83 +++ .../client/v3/naming/endpoints/endpoints.go | 82 +++ .../v3/naming/endpoints/endpoints_impl.go | 175 ++++++ .../v3/naming/endpoints/internal/update.go | 38 ++ .../go.etcd.io/etcd/client/v3/ordering/doc.go | 42 ++ .../go.etcd.io/etcd/client/v3/ordering/kv.go | 149 +++++ .../etcd/client/v3/ordering/util.go | 42 ++ .../etcd/pkg/v3/osutil/interrupt_unix.go | 85 +++ .../etcd/pkg/v3/osutil/interrupt_windows.go | 37 ++ .../go.etcd.io/etcd/pkg/v3/osutil/osutil.go | 41 ++ .../go.etcd.io/etcd/pkg/v3/osutil/signal.go | 22 + .../etcd/pkg/v3/osutil/signal_linux.go | 31 + .../etcd/server/v3/etcdmain/config.go | 471 +++++++++++++++ .../go.etcd.io/etcd/server/v3/etcdmain/doc.go | 16 + .../etcd/server/v3/etcdmain/etcd.go | 488 ++++++++++++++++ .../etcd/server/v3/etcdmain/gateway.go | 183 ++++++ .../etcd/server/v3/etcdmain/grpc_proxy.go | 535 ++++++++++++++++++ .../etcd/server/v3/etcdmain/help.go | 262 +++++++++ .../etcd/server/v3/etcdmain/main.go | 51 ++ .../etcd/server/v3/etcdmain/util.go | 97 ++++ .../etcd/server/v3/proxy/grpcproxy/auth.go | 115 ++++ .../server/v3/proxy/grpcproxy/cache/store.go | 172 ++++++ .../etcd/server/v3/proxy/grpcproxy/cluster.go | 213 +++++++ .../etcd/server/v3/proxy/grpcproxy/doc.go | 16 + .../server/v3/proxy/grpcproxy/election.go | 65 +++ .../etcd/server/v3/proxy/grpcproxy/health.go | 76 +++ .../etcd/server/v3/proxy/grpcproxy/kv.go | 232 ++++++++ .../etcd/server/v3/proxy/grpcproxy/leader.go | 113 ++++ .../etcd/server/v3/proxy/grpcproxy/lease.go | 384 +++++++++++++ .../etcd/server/v3/proxy/grpcproxy/lock.go | 38 ++ .../server/v3/proxy/grpcproxy/maintenance.go | 95 ++++ .../etcd/server/v3/proxy/grpcproxy/metrics.go | 121 ++++ .../server/v3/proxy/grpcproxy/register.go | 102 ++++ .../etcd/server/v3/proxy/grpcproxy/util.go | 75 +++ .../etcd/server/v3/proxy/grpcproxy/watch.go | 313 ++++++++++ .../v3/proxy/grpcproxy/watch_broadcast.go | 166 ++++++ .../v3/proxy/grpcproxy/watch_broadcasts.go | 135 +++++ .../server/v3/proxy/grpcproxy/watch_ranges.go | 69 +++ .../etcd/server/v3/proxy/grpcproxy/watcher.go | 130 +++++ .../server/v3/proxy/httpproxy/director.go | 179 ++++++ .../etcd/server/v3/proxy/httpproxy/doc.go | 18 + .../etcd/server/v3/proxy/httpproxy/metrics.go | 90 +++ .../etcd/server/v3/proxy/httpproxy/proxy.go | 121 ++++ .../etcd/server/v3/proxy/httpproxy/reverse.go | 227 ++++++++ .../etcd/server/v3/proxy/tcpproxy/doc.go | 16 + .../server/v3/proxy/tcpproxy/userspace.go | 231 ++++++++ vendor/modules.txt | 11 + 56 files changed, 7963 insertions(+) create mode 100644 vendor/go.etcd.io/etcd/client/v3/leasing/cache.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/leasing/doc.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/leasing/kv.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/leasing/txn.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/leasing/util.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/namespace/doc.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/namespace/kv.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/namespace/lease.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/namespace/util.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/namespace/watch.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints_impl.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/naming/endpoints/internal/update.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/ordering/doc.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/ordering/kv.go create mode 100644 vendor/go.etcd.io/etcd/client/v3/ordering/util.go create mode 100644 vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_unix.go create mode 100644 vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_windows.go create mode 100644 vendor/go.etcd.io/etcd/pkg/v3/osutil/osutil.go create mode 100644 vendor/go.etcd.io/etcd/pkg/v3/osutil/signal.go create mode 100644 vendor/go.etcd.io/etcd/pkg/v3/osutil/signal_linux.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/config.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/doc.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/etcd.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/gateway.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/grpc_proxy.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/help.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/main.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/etcdmain/util.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/auth.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache/store.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cluster.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/doc.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/election.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/health.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/kv.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/leader.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lease.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lock.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/maintenance.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/metrics.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/register.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/util.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcast.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcasts.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_ranges.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watcher.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/director.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/doc.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/metrics.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/proxy.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/reverse.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/doc.go create mode 100644 vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/userspace.go diff --git a/vendor/go.etcd.io/etcd/client/v3/leasing/cache.go b/vendor/go.etcd.io/etcd/client/v3/leasing/cache.go new file mode 100644 index 0000000000..214ee2fc19 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/leasing/cache.go @@ -0,0 +1,306 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package leasing + +import ( + "context" + "strings" + "sync" + "time" + + v3pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/mvccpb" + v3 "go.etcd.io/etcd/client/v3" +) + +const revokeBackoff = 2 * time.Second + +type leaseCache struct { + mu sync.RWMutex + entries map[string]*leaseKey + revokes map[string]time.Time + header *v3pb.ResponseHeader +} + +type leaseKey struct { + response *v3.GetResponse + // rev is the leasing key revision. + rev int64 + waitc chan struct{} +} + +func (lc *leaseCache) Rev(key string) int64 { + lc.mu.RLock() + defer lc.mu.RUnlock() + if li := lc.entries[key]; li != nil { + return li.rev + } + return 0 +} + +func (lc *leaseCache) Lock(key string) (chan<- struct{}, int64) { + lc.mu.Lock() + defer lc.mu.Unlock() + if li := lc.entries[key]; li != nil { + li.waitc = make(chan struct{}) + return li.waitc, li.rev + } + return nil, 0 +} + +func (lc *leaseCache) LockRange(begin, end string) (ret []chan<- struct{}) { + lc.mu.Lock() + defer lc.mu.Unlock() + for k, li := range lc.entries { + if inRange(k, begin, end) { + li.waitc = make(chan struct{}) + ret = append(ret, li.waitc) + } + } + return ret +} + +func inRange(k, begin, end string) bool { + if strings.Compare(k, begin) < 0 { + return false + } + if end != "\x00" && strings.Compare(k, end) >= 0 { + return false + } + return true +} + +func (lc *leaseCache) LockWriteOps(ops []v3.Op) (ret []chan<- struct{}) { + for _, op := range ops { + if op.IsGet() { + continue + } + key := string(op.KeyBytes()) + if end := string(op.RangeBytes()); end == "" { + if wc, _ := lc.Lock(key); wc != nil { + ret = append(ret, wc) + } + } else { + for k := range lc.entries { + if !inRange(k, key, end) { + continue + } + if wc, _ := lc.Lock(k); wc != nil { + ret = append(ret, wc) + } + } + } + } + return ret +} + +func (lc *leaseCache) NotifyOps(ops []v3.Op) (wcs []<-chan struct{}) { + for _, op := range ops { + if op.IsGet() { + if _, wc := lc.notify(string(op.KeyBytes())); wc != nil { + wcs = append(wcs, wc) + } + } + } + return wcs +} + +func (lc *leaseCache) MayAcquire(key string) bool { + lc.mu.RLock() + lr, ok := lc.revokes[key] + lc.mu.RUnlock() + return !ok || time.Since(lr) > revokeBackoff +} + +func (lc *leaseCache) Add(key string, resp *v3.GetResponse, op v3.Op) *v3.GetResponse { + lk := &leaseKey{resp, resp.Header.Revision, closedCh} + lc.mu.Lock() + if lc.header == nil || lc.header.Revision < resp.Header.Revision { + lc.header = resp.Header + } + lc.entries[key] = lk + ret := lk.get(op) + lc.mu.Unlock() + return ret +} + +func (lc *leaseCache) Update(key, val []byte, respHeader *v3pb.ResponseHeader) { + li := lc.entries[string(key)] + if li == nil { + return + } + cacheResp := li.response + if len(cacheResp.Kvs) == 0 { + kv := &mvccpb.KeyValue{ + Key: key, + CreateRevision: respHeader.Revision, + } + cacheResp.Kvs = append(cacheResp.Kvs, kv) + cacheResp.Count = 1 + } + cacheResp.Kvs[0].Version++ + if cacheResp.Kvs[0].ModRevision < respHeader.Revision { + cacheResp.Header = respHeader + cacheResp.Kvs[0].ModRevision = respHeader.Revision + cacheResp.Kvs[0].Value = val + } +} + +func (lc *leaseCache) Delete(key string, hdr *v3pb.ResponseHeader) { + lc.mu.Lock() + defer lc.mu.Unlock() + lc.delete(key, hdr) +} + +func (lc *leaseCache) delete(key string, hdr *v3pb.ResponseHeader) { + if li := lc.entries[key]; li != nil && hdr.Revision >= li.response.Header.Revision { + li.response.Kvs = nil + li.response.Header = copyHeader(hdr) + } +} + +func (lc *leaseCache) Evict(key string) (rev int64) { + lc.mu.Lock() + defer lc.mu.Unlock() + if li := lc.entries[key]; li != nil { + rev = li.rev + delete(lc.entries, key) + lc.revokes[key] = time.Now() + } + return rev +} + +func (lc *leaseCache) EvictRange(key, end string) { + lc.mu.Lock() + defer lc.mu.Unlock() + for k := range lc.entries { + if inRange(k, key, end) { + delete(lc.entries, key) + lc.revokes[key] = time.Now() + } + } +} + +func isBadOp(op v3.Op) bool { return op.Rev() > 0 || len(op.RangeBytes()) > 0 } + +func (lc *leaseCache) Get(ctx context.Context, op v3.Op) (*v3.GetResponse, bool) { + if isBadOp(op) { + return nil, false + } + key := string(op.KeyBytes()) + li, wc := lc.notify(key) + if li == nil { + return nil, true + } + select { + case <-wc: + case <-ctx.Done(): + return nil, true + } + lc.mu.RLock() + lk := *li + ret := lk.get(op) + lc.mu.RUnlock() + return ret, true +} + +func (lk *leaseKey) get(op v3.Op) *v3.GetResponse { + ret := *lk.response + ret.Header = copyHeader(ret.Header) + empty := len(ret.Kvs) == 0 || op.IsCountOnly() + empty = empty || (op.MinModRev() > ret.Kvs[0].ModRevision) + empty = empty || (op.MaxModRev() != 0 && op.MaxModRev() < ret.Kvs[0].ModRevision) + empty = empty || (op.MinCreateRev() > ret.Kvs[0].CreateRevision) + empty = empty || (op.MaxCreateRev() != 0 && op.MaxCreateRev() < ret.Kvs[0].CreateRevision) + if empty { + ret.Kvs = nil + } else { + kv := *ret.Kvs[0] + kv.Key = make([]byte, len(kv.Key)) + copy(kv.Key, ret.Kvs[0].Key) + if !op.IsKeysOnly() { + kv.Value = make([]byte, len(kv.Value)) + copy(kv.Value, ret.Kvs[0].Value) + } + ret.Kvs = []*mvccpb.KeyValue{&kv} + } + return &ret +} + +func (lc *leaseCache) notify(key string) (*leaseKey, <-chan struct{}) { + lc.mu.RLock() + defer lc.mu.RUnlock() + if li := lc.entries[key]; li != nil { + return li, li.waitc + } + return nil, nil +} + +func (lc *leaseCache) clearOldRevokes(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-time.After(time.Second): + lc.mu.Lock() + for k, lr := range lc.revokes { + if time.Since(lr.Add(revokeBackoff)) > 0 { + delete(lc.revokes, k) + } + } + lc.mu.Unlock() + } + } +} + +func (lc *leaseCache) evalCmp(cmps []v3.Cmp) (cmpVal bool, ok bool) { + for _, cmp := range cmps { + if len(cmp.RangeEnd) > 0 { + return false, false + } + lk := lc.entries[string(cmp.Key)] + if lk == nil { + return false, false + } + if !evalCmp(lk.response, cmp) { + return false, true + } + } + return true, true +} + +func (lc *leaseCache) evalOps(ops []v3.Op) ([]*v3pb.ResponseOp, bool) { + resps := make([]*v3pb.ResponseOp, len(ops)) + for i, op := range ops { + if !op.IsGet() || isBadOp(op) { + // TODO: support read-only Txn + return nil, false + } + lk := lc.entries[string(op.KeyBytes())] + if lk == nil { + return nil, false + } + resp := lk.get(op) + if resp == nil { + return nil, false + } + resps[i] = &v3pb.ResponseOp{ + Response: &v3pb.ResponseOp_ResponseRange{ + ResponseRange: (*v3pb.RangeResponse)(resp), + }, + } + } + return resps, true +} diff --git a/vendor/go.etcd.io/etcd/client/v3/leasing/doc.go b/vendor/go.etcd.io/etcd/client/v3/leasing/doc.go new file mode 100644 index 0000000000..fc97fc8826 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/leasing/doc.go @@ -0,0 +1,46 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package leasing serves linearizable reads from a local cache by acquiring +// exclusive write access to keys through a client-side leasing protocol. This +// leasing layer can either directly wrap the etcd client or it can be exposed +// through the etcd grpc proxy server, granting multiple clients write access. +// +// First, create a leasing KV from a clientv3.Client 'cli': +// +// lkv, err := leasing.NewKV(cli, "leasing-prefix") +// if err != nil { +// // handle error +// } +// +// A range request for a key "abc" tries to acquire a leasing key so it can cache the range's +// key locally. On the server, the leasing key is stored to "leasing-prefix/abc": +// +// resp, err := lkv.Get(context.TODO(), "abc") +// +// Future linearized read requests using 'lkv' will be served locally for the lease's lifetime: +// +// resp, err = lkv.Get(context.TODO(), "abc") +// +// If another leasing client writes to a leased key, then the owner relinquishes its exclusive +// access, permitting the writer to modify the key: +// +// lkv2, err := leasing.NewKV(cli, "leasing-prefix") +// if err != nil { +// // handle error +// } +// lkv2.Put(context.TODO(), "abc", "456") +// resp, err = lkv.Get("abc") +// +package leasing diff --git a/vendor/go.etcd.io/etcd/client/v3/leasing/kv.go b/vendor/go.etcd.io/etcd/client/v3/leasing/kv.go new file mode 100644 index 0000000000..f0cded20fe --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/leasing/kv.go @@ -0,0 +1,479 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package leasing + +import ( + "context" + "strings" + "sync" + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/mvccpb" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + v3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/concurrency" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type leasingKV struct { + cl *v3.Client + kv v3.KV + pfx string + leases leaseCache + + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup + + sessionOpts []concurrency.SessionOption + session *concurrency.Session + sessionc chan struct{} +} + +var closedCh chan struct{} + +func init() { + closedCh = make(chan struct{}) + close(closedCh) +} + +// NewKV wraps a KV instance so that all requests are wired through a leasing protocol. +func NewKV(cl *v3.Client, pfx string, opts ...concurrency.SessionOption) (v3.KV, func(), error) { + cctx, cancel := context.WithCancel(cl.Ctx()) + lkv := &leasingKV{ + cl: cl, + kv: cl.KV, + pfx: pfx, + leases: leaseCache{revokes: make(map[string]time.Time)}, + ctx: cctx, + cancel: cancel, + sessionOpts: opts, + sessionc: make(chan struct{}), + } + lkv.wg.Add(2) + go func() { + defer lkv.wg.Done() + lkv.monitorSession() + }() + go func() { + defer lkv.wg.Done() + lkv.leases.clearOldRevokes(cctx) + }() + return lkv, lkv.Close, lkv.waitSession(cctx) +} + +func (lkv *leasingKV) Close() { + lkv.cancel() + lkv.wg.Wait() +} + +func (lkv *leasingKV) Get(ctx context.Context, key string, opts ...v3.OpOption) (*v3.GetResponse, error) { + return lkv.get(ctx, v3.OpGet(key, opts...)) +} + +func (lkv *leasingKV) Put(ctx context.Context, key, val string, opts ...v3.OpOption) (*v3.PutResponse, error) { + return lkv.put(ctx, v3.OpPut(key, val, opts...)) +} + +func (lkv *leasingKV) Delete(ctx context.Context, key string, opts ...v3.OpOption) (*v3.DeleteResponse, error) { + return lkv.delete(ctx, v3.OpDelete(key, opts...)) +} + +func (lkv *leasingKV) Do(ctx context.Context, op v3.Op) (v3.OpResponse, error) { + switch { + case op.IsGet(): + resp, err := lkv.get(ctx, op) + return resp.OpResponse(), err + case op.IsPut(): + resp, err := lkv.put(ctx, op) + return resp.OpResponse(), err + case op.IsDelete(): + resp, err := lkv.delete(ctx, op) + return resp.OpResponse(), err + case op.IsTxn(): + cmps, thenOps, elseOps := op.Txn() + resp, err := lkv.Txn(ctx).If(cmps...).Then(thenOps...).Else(elseOps...).Commit() + return resp.OpResponse(), err + } + return v3.OpResponse{}, nil +} + +func (lkv *leasingKV) Compact(ctx context.Context, rev int64, opts ...v3.CompactOption) (*v3.CompactResponse, error) { + return lkv.kv.Compact(ctx, rev, opts...) +} + +func (lkv *leasingKV) Txn(ctx context.Context) v3.Txn { + return &txnLeasing{Txn: lkv.kv.Txn(ctx), lkv: lkv, ctx: ctx} +} + +func (lkv *leasingKV) monitorSession() { + for lkv.ctx.Err() == nil { + if lkv.session != nil { + select { + case <-lkv.session.Done(): + case <-lkv.ctx.Done(): + return + } + } + lkv.leases.mu.Lock() + select { + case <-lkv.sessionc: + lkv.sessionc = make(chan struct{}) + default: + } + lkv.leases.entries = make(map[string]*leaseKey) + lkv.leases.mu.Unlock() + + s, err := concurrency.NewSession(lkv.cl, lkv.sessionOpts...) + if err != nil { + continue + } + + lkv.leases.mu.Lock() + lkv.session = s + close(lkv.sessionc) + lkv.leases.mu.Unlock() + } +} + +func (lkv *leasingKV) monitorLease(ctx context.Context, key string, rev int64) { + cctx, cancel := context.WithCancel(lkv.ctx) + defer cancel() + for cctx.Err() == nil { + if rev == 0 { + resp, err := lkv.kv.Get(ctx, lkv.pfx+key) + if err != nil { + continue + } + rev = resp.Header.Revision + if len(resp.Kvs) == 0 || string(resp.Kvs[0].Value) == "REVOKE" { + lkv.rescind(cctx, key, rev) + return + } + } + wch := lkv.cl.Watch(cctx, lkv.pfx+key, v3.WithRev(rev+1)) + for resp := range wch { + for _, ev := range resp.Events { + if string(ev.Kv.Value) != "REVOKE" { + continue + } + if v3.LeaseID(ev.Kv.Lease) == lkv.leaseID() { + lkv.rescind(cctx, key, ev.Kv.ModRevision) + } + return + } + } + rev = 0 + } +} + +// rescind releases a lease from this client. +func (lkv *leasingKV) rescind(ctx context.Context, key string, rev int64) { + if lkv.leases.Evict(key) > rev { + return + } + cmp := v3.Compare(v3.CreateRevision(lkv.pfx+key), "<", rev) + op := v3.OpDelete(lkv.pfx + key) + for ctx.Err() == nil { + if _, err := lkv.kv.Txn(ctx).If(cmp).Then(op).Commit(); err == nil { + return + } + } +} + +func (lkv *leasingKV) waitRescind(ctx context.Context, key string, rev int64) error { + cctx, cancel := context.WithCancel(ctx) + defer cancel() + wch := lkv.cl.Watch(cctx, lkv.pfx+key, v3.WithRev(rev+1)) + for resp := range wch { + for _, ev := range resp.Events { + if ev.Type == v3.EventTypeDelete { + return ctx.Err() + } + } + } + return ctx.Err() +} + +func (lkv *leasingKV) tryModifyOp(ctx context.Context, op v3.Op) (*v3.TxnResponse, chan<- struct{}, error) { + key := string(op.KeyBytes()) + wc, rev := lkv.leases.Lock(key) + cmp := v3.Compare(v3.CreateRevision(lkv.pfx+key), "<", rev+1) + resp, err := lkv.kv.Txn(ctx).If(cmp).Then(op).Commit() + switch { + case err != nil: + lkv.leases.Evict(key) + fallthrough + case !resp.Succeeded: + if wc != nil { + close(wc) + } + return nil, nil, err + } + return resp, wc, nil +} + +func (lkv *leasingKV) put(ctx context.Context, op v3.Op) (pr *v3.PutResponse, err error) { + if err := lkv.waitSession(ctx); err != nil { + return nil, err + } + for ctx.Err() == nil { + resp, wc, err := lkv.tryModifyOp(ctx, op) + if err != nil || wc == nil { + resp, err = lkv.revoke(ctx, string(op.KeyBytes()), op) + } + if err != nil { + return nil, err + } + if resp.Succeeded { + lkv.leases.mu.Lock() + lkv.leases.Update(op.KeyBytes(), op.ValueBytes(), resp.Header) + lkv.leases.mu.Unlock() + pr = (*v3.PutResponse)(resp.Responses[0].GetResponsePut()) + pr.Header = resp.Header + } + if wc != nil { + close(wc) + } + if resp.Succeeded { + return pr, nil + } + } + return nil, ctx.Err() +} + +func (lkv *leasingKV) acquire(ctx context.Context, key string, op v3.Op) (*v3.TxnResponse, error) { + for ctx.Err() == nil { + if err := lkv.waitSession(ctx); err != nil { + return nil, err + } + lcmp := v3.Cmp{Key: []byte(key), Target: pb.Compare_LEASE} + resp, err := lkv.kv.Txn(ctx).If( + v3.Compare(v3.CreateRevision(lkv.pfx+key), "=", 0), + v3.Compare(lcmp, "=", 0)). + Then( + op, + v3.OpPut(lkv.pfx+key, "", v3.WithLease(lkv.leaseID()))). + Else( + op, + v3.OpGet(lkv.pfx+key), + ).Commit() + if err == nil { + if !resp.Succeeded { + kvs := resp.Responses[1].GetResponseRange().Kvs + // if txn failed since already owner, lease is acquired + resp.Succeeded = len(kvs) > 0 && v3.LeaseID(kvs[0].Lease) == lkv.leaseID() + } + return resp, nil + } + // retry if transient error + if _, ok := err.(rpctypes.EtcdError); ok { + return nil, err + } + if ev, ok := status.FromError(err); ok && ev.Code() != codes.Unavailable { + return nil, err + } + } + return nil, ctx.Err() +} + +func (lkv *leasingKV) get(ctx context.Context, op v3.Op) (*v3.GetResponse, error) { + do := func() (*v3.GetResponse, error) { + r, err := lkv.kv.Do(ctx, op) + return r.Get(), err + } + if !lkv.readySession() { + return do() + } + + if resp, ok := lkv.leases.Get(ctx, op); resp != nil { + return resp, nil + } else if !ok || op.IsSerializable() { + // must be handled by server or can skip linearization + return do() + } + + key := string(op.KeyBytes()) + if !lkv.leases.MayAcquire(key) { + resp, err := lkv.kv.Do(ctx, op) + return resp.Get(), err + } + + resp, err := lkv.acquire(ctx, key, v3.OpGet(key)) + if err != nil { + return nil, err + } + getResp := (*v3.GetResponse)(resp.Responses[0].GetResponseRange()) + getResp.Header = resp.Header + if resp.Succeeded { + getResp = lkv.leases.Add(key, getResp, op) + lkv.wg.Add(1) + go func() { + defer lkv.wg.Done() + lkv.monitorLease(ctx, key, resp.Header.Revision) + }() + } + return getResp, nil +} + +func (lkv *leasingKV) deleteRangeRPC(ctx context.Context, maxLeaseRev int64, key, end string) (*v3.DeleteResponse, error) { + lkey, lend := lkv.pfx+key, lkv.pfx+end + resp, err := lkv.kv.Txn(ctx).If( + v3.Compare(v3.CreateRevision(lkey).WithRange(lend), "<", maxLeaseRev+1), + ).Then( + v3.OpGet(key, v3.WithRange(end), v3.WithKeysOnly()), + v3.OpDelete(key, v3.WithRange(end)), + ).Commit() + if err != nil { + lkv.leases.EvictRange(key, end) + return nil, err + } + if !resp.Succeeded { + return nil, nil + } + for _, kv := range resp.Responses[0].GetResponseRange().Kvs { + lkv.leases.Delete(string(kv.Key), resp.Header) + } + delResp := (*v3.DeleteResponse)(resp.Responses[1].GetResponseDeleteRange()) + delResp.Header = resp.Header + return delResp, nil +} + +func (lkv *leasingKV) deleteRange(ctx context.Context, op v3.Op) (*v3.DeleteResponse, error) { + key, end := string(op.KeyBytes()), string(op.RangeBytes()) + for ctx.Err() == nil { + maxLeaseRev, err := lkv.revokeRange(ctx, key, end) + if err != nil { + return nil, err + } + wcs := lkv.leases.LockRange(key, end) + delResp, err := lkv.deleteRangeRPC(ctx, maxLeaseRev, key, end) + closeAll(wcs) + if err != nil || delResp != nil { + return delResp, err + } + } + return nil, ctx.Err() +} + +func (lkv *leasingKV) delete(ctx context.Context, op v3.Op) (dr *v3.DeleteResponse, err error) { + if err := lkv.waitSession(ctx); err != nil { + return nil, err + } + if len(op.RangeBytes()) > 0 { + return lkv.deleteRange(ctx, op) + } + key := string(op.KeyBytes()) + for ctx.Err() == nil { + resp, wc, err := lkv.tryModifyOp(ctx, op) + if err != nil || wc == nil { + resp, err = lkv.revoke(ctx, key, op) + } + if err != nil { + // don't know if delete was processed + lkv.leases.Evict(key) + return nil, err + } + if resp.Succeeded { + dr = (*v3.DeleteResponse)(resp.Responses[0].GetResponseDeleteRange()) + dr.Header = resp.Header + lkv.leases.Delete(key, dr.Header) + } + if wc != nil { + close(wc) + } + if resp.Succeeded { + return dr, nil + } + } + return nil, ctx.Err() +} + +func (lkv *leasingKV) revoke(ctx context.Context, key string, op v3.Op) (*v3.TxnResponse, error) { + rev := lkv.leases.Rev(key) + txn := lkv.kv.Txn(ctx).If(v3.Compare(v3.CreateRevision(lkv.pfx+key), "<", rev+1)).Then(op) + resp, err := txn.Else(v3.OpPut(lkv.pfx+key, "REVOKE", v3.WithIgnoreLease())).Commit() + if err != nil || resp.Succeeded { + return resp, err + } + return resp, lkv.waitRescind(ctx, key, resp.Header.Revision) +} + +func (lkv *leasingKV) revokeRange(ctx context.Context, begin, end string) (int64, error) { + lkey, lend := lkv.pfx+begin, "" + if len(end) > 0 { + lend = lkv.pfx + end + } + leaseKeys, err := lkv.kv.Get(ctx, lkey, v3.WithRange(lend)) + if err != nil { + return 0, err + } + return lkv.revokeLeaseKvs(ctx, leaseKeys.Kvs) +} + +func (lkv *leasingKV) revokeLeaseKvs(ctx context.Context, kvs []*mvccpb.KeyValue) (int64, error) { + maxLeaseRev := int64(0) + for _, kv := range kvs { + if rev := kv.CreateRevision; rev > maxLeaseRev { + maxLeaseRev = rev + } + if v3.LeaseID(kv.Lease) == lkv.leaseID() { + // don't revoke own keys + continue + } + key := strings.TrimPrefix(string(kv.Key), lkv.pfx) + if _, err := lkv.revoke(ctx, key, v3.OpGet(key)); err != nil { + return 0, err + } + } + return maxLeaseRev, nil +} + +func (lkv *leasingKV) waitSession(ctx context.Context) error { + lkv.leases.mu.RLock() + sessionc := lkv.sessionc + lkv.leases.mu.RUnlock() + select { + case <-sessionc: + return nil + case <-lkv.ctx.Done(): + return lkv.ctx.Err() + case <-ctx.Done(): + return ctx.Err() + } +} + +func (lkv *leasingKV) readySession() bool { + lkv.leases.mu.RLock() + defer lkv.leases.mu.RUnlock() + if lkv.session == nil { + return false + } + select { + case <-lkv.session.Done(): + default: + return true + } + return false +} + +func (lkv *leasingKV) leaseID() v3.LeaseID { + lkv.leases.mu.RLock() + defer lkv.leases.mu.RUnlock() + return lkv.session.Lease() +} diff --git a/vendor/go.etcd.io/etcd/client/v3/leasing/txn.go b/vendor/go.etcd.io/etcd/client/v3/leasing/txn.go new file mode 100644 index 0000000000..30c6aa2e4d --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/leasing/txn.go @@ -0,0 +1,223 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package leasing + +import ( + "context" + "strings" + + v3pb "go.etcd.io/etcd/api/v3/etcdserverpb" + v3 "go.etcd.io/etcd/client/v3" +) + +type txnLeasing struct { + v3.Txn + lkv *leasingKV + ctx context.Context + cs []v3.Cmp + opst []v3.Op + opse []v3.Op +} + +func (txn *txnLeasing) If(cs ...v3.Cmp) v3.Txn { + txn.cs = append(txn.cs, cs...) + txn.Txn = txn.Txn.If(cs...) + return txn +} + +func (txn *txnLeasing) Then(ops ...v3.Op) v3.Txn { + txn.opst = append(txn.opst, ops...) + txn.Txn = txn.Txn.Then(ops...) + return txn +} + +func (txn *txnLeasing) Else(ops ...v3.Op) v3.Txn { + txn.opse = append(txn.opse, ops...) + txn.Txn = txn.Txn.Else(ops...) + return txn +} + +func (txn *txnLeasing) Commit() (*v3.TxnResponse, error) { + if resp, err := txn.eval(); resp != nil || err != nil { + return resp, err + } + return txn.serverTxn() +} + +func (txn *txnLeasing) eval() (*v3.TxnResponse, error) { + // TODO: wait on keys in comparisons + thenOps, elseOps := gatherOps(txn.opst), gatherOps(txn.opse) + ops := make([]v3.Op, 0, len(thenOps)+len(elseOps)) + ops = append(ops, thenOps...) + ops = append(ops, elseOps...) + + for _, ch := range txn.lkv.leases.NotifyOps(ops) { + select { + case <-ch: + case <-txn.ctx.Done(): + return nil, txn.ctx.Err() + } + } + + txn.lkv.leases.mu.RLock() + defer txn.lkv.leases.mu.RUnlock() + succeeded, ok := txn.lkv.leases.evalCmp(txn.cs) + if !ok || txn.lkv.leases.header == nil { + return nil, nil + } + if ops = txn.opst; !succeeded { + ops = txn.opse + } + + resps, ok := txn.lkv.leases.evalOps(ops) + if !ok { + return nil, nil + } + return &v3.TxnResponse{Header: copyHeader(txn.lkv.leases.header), Succeeded: succeeded, Responses: resps}, nil +} + +// fallback computes the ops to fetch all possible conflicting +// leasing keys for a list of ops. +func (txn *txnLeasing) fallback(ops []v3.Op) (fbOps []v3.Op) { + for _, op := range ops { + if op.IsGet() { + continue + } + lkey, lend := txn.lkv.pfx+string(op.KeyBytes()), "" + if len(op.RangeBytes()) > 0 { + lend = txn.lkv.pfx + string(op.RangeBytes()) + } + fbOps = append(fbOps, v3.OpGet(lkey, v3.WithRange(lend))) + } + return fbOps +} + +func (txn *txnLeasing) guardKeys(ops []v3.Op) (cmps []v3.Cmp) { + seen := make(map[string]bool) + for _, op := range ops { + key := string(op.KeyBytes()) + if op.IsGet() || len(op.RangeBytes()) != 0 || seen[key] { + continue + } + rev := txn.lkv.leases.Rev(key) + cmps = append(cmps, v3.Compare(v3.CreateRevision(txn.lkv.pfx+key), "<", rev+1)) + seen[key] = true + } + return cmps +} + +func (txn *txnLeasing) guardRanges(ops []v3.Op) (cmps []v3.Cmp, err error) { + for _, op := range ops { + if op.IsGet() || len(op.RangeBytes()) == 0 { + continue + } + + key, end := string(op.KeyBytes()), string(op.RangeBytes()) + maxRevLK, err := txn.lkv.revokeRange(txn.ctx, key, end) + if err != nil { + return nil, err + } + + opts := append(v3.WithLastRev(), v3.WithRange(end)) + getResp, err := txn.lkv.kv.Get(txn.ctx, key, opts...) + if err != nil { + return nil, err + } + maxModRev := int64(0) + if len(getResp.Kvs) > 0 { + maxModRev = getResp.Kvs[0].ModRevision + } + + noKeyUpdate := v3.Compare(v3.ModRevision(key).WithRange(end), "<", maxModRev+1) + noLeaseUpdate := v3.Compare( + v3.CreateRevision(txn.lkv.pfx+key).WithRange(txn.lkv.pfx+end), + "<", + maxRevLK+1) + cmps = append(cmps, noKeyUpdate, noLeaseUpdate) + } + return cmps, nil +} + +func (txn *txnLeasing) guard(ops []v3.Op) ([]v3.Cmp, error) { + cmps := txn.guardKeys(ops) + rangeCmps, err := txn.guardRanges(ops) + return append(cmps, rangeCmps...), err +} + +func (txn *txnLeasing) commitToCache(txnResp *v3pb.TxnResponse, userTxn v3.Op) { + ops := gatherResponseOps(txnResp.Responses, []v3.Op{userTxn}) + txn.lkv.leases.mu.Lock() + for _, op := range ops { + key := string(op.KeyBytes()) + if op.IsDelete() && len(op.RangeBytes()) > 0 { + end := string(op.RangeBytes()) + for k := range txn.lkv.leases.entries { + if inRange(k, key, end) { + txn.lkv.leases.delete(k, txnResp.Header) + } + } + } else if op.IsDelete() { + txn.lkv.leases.delete(key, txnResp.Header) + } + if op.IsPut() { + txn.lkv.leases.Update(op.KeyBytes(), op.ValueBytes(), txnResp.Header) + } + } + txn.lkv.leases.mu.Unlock() +} + +func (txn *txnLeasing) revokeFallback(fbResps []*v3pb.ResponseOp) error { + for _, resp := range fbResps { + _, err := txn.lkv.revokeLeaseKvs(txn.ctx, resp.GetResponseRange().Kvs) + if err != nil { + return err + } + } + return nil +} + +func (txn *txnLeasing) serverTxn() (*v3.TxnResponse, error) { + if err := txn.lkv.waitSession(txn.ctx); err != nil { + return nil, err + } + + userOps := gatherOps(append(txn.opst, txn.opse...)) + userTxn := v3.OpTxn(txn.cs, txn.opst, txn.opse) + fbOps := txn.fallback(userOps) + + defer closeAll(txn.lkv.leases.LockWriteOps(userOps)) + for { + cmps, err := txn.guard(userOps) + if err != nil { + return nil, err + } + resp, err := txn.lkv.kv.Txn(txn.ctx).If(cmps...).Then(userTxn).Else(fbOps...).Commit() + if err != nil { + for _, cmp := range cmps { + txn.lkv.leases.Evict(strings.TrimPrefix(string(cmp.Key), txn.lkv.pfx)) + } + return nil, err + } + if resp.Succeeded { + txn.commitToCache((*v3pb.TxnResponse)(resp), userTxn) + userResp := resp.Responses[0].GetResponseTxn() + userResp.Header = resp.Header + return (*v3.TxnResponse)(userResp), nil + } + if err := txn.revokeFallback(resp.Responses); err != nil { + return nil, err + } + } +} diff --git a/vendor/go.etcd.io/etcd/client/v3/leasing/util.go b/vendor/go.etcd.io/etcd/client/v3/leasing/util.go new file mode 100644 index 0000000000..b6a520f03f --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/leasing/util.go @@ -0,0 +1,108 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package leasing + +import ( + "bytes" + + v3pb "go.etcd.io/etcd/api/v3/etcdserverpb" + v3 "go.etcd.io/etcd/client/v3" +) + +func compareInt64(a, b int64) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} + +func evalCmp(resp *v3.GetResponse, tcmp v3.Cmp) bool { + var result int + if len(resp.Kvs) != 0 { + kv := resp.Kvs[0] + switch tcmp.Target { + case v3pb.Compare_VALUE: + if tv, _ := tcmp.TargetUnion.(*v3pb.Compare_Value); tv != nil { + result = bytes.Compare(kv.Value, tv.Value) + } + case v3pb.Compare_CREATE: + if tv, _ := tcmp.TargetUnion.(*v3pb.Compare_CreateRevision); tv != nil { + result = compareInt64(kv.CreateRevision, tv.CreateRevision) + } + case v3pb.Compare_MOD: + if tv, _ := tcmp.TargetUnion.(*v3pb.Compare_ModRevision); tv != nil { + result = compareInt64(kv.ModRevision, tv.ModRevision) + } + case v3pb.Compare_VERSION: + if tv, _ := tcmp.TargetUnion.(*v3pb.Compare_Version); tv != nil { + result = compareInt64(kv.Version, tv.Version) + } + } + } + switch tcmp.Result { + case v3pb.Compare_EQUAL: + return result == 0 + case v3pb.Compare_NOT_EQUAL: + return result != 0 + case v3pb.Compare_GREATER: + return result > 0 + case v3pb.Compare_LESS: + return result < 0 + } + return true +} + +func gatherOps(ops []v3.Op) (ret []v3.Op) { + for _, op := range ops { + if !op.IsTxn() { + ret = append(ret, op) + continue + } + _, thenOps, elseOps := op.Txn() + ret = append(ret, gatherOps(append(thenOps, elseOps...))...) + } + return ret +} + +func gatherResponseOps(resp []*v3pb.ResponseOp, ops []v3.Op) (ret []v3.Op) { + for i, op := range ops { + if !op.IsTxn() { + ret = append(ret, op) + continue + } + _, thenOps, elseOps := op.Txn() + if txnResp := resp[i].GetResponseTxn(); txnResp.Succeeded { + ret = append(ret, gatherResponseOps(txnResp.Responses, thenOps)...) + } else { + ret = append(ret, gatherResponseOps(txnResp.Responses, elseOps)...) + } + } + return ret +} + +func copyHeader(hdr *v3pb.ResponseHeader) *v3pb.ResponseHeader { + h := *hdr + return &h +} + +func closeAll(chs []chan<- struct{}) { + for _, ch := range chs { + close(ch) + } +} diff --git a/vendor/go.etcd.io/etcd/client/v3/namespace/doc.go b/vendor/go.etcd.io/etcd/client/v3/namespace/doc.go new file mode 100644 index 0000000000..01849b150a --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/namespace/doc.go @@ -0,0 +1,43 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package namespace is a clientv3 wrapper that translates all keys to begin +// with a given prefix. +// +// First, create a client: +// +// cli, err := clientv3.New(clientv3.Config{Endpoints: []string{"localhost:2379"}}) +// if err != nil { +// // handle error! +// } +// +// Next, override the client interfaces: +// +// unprefixedKV := cli.KV +// cli.KV = namespace.NewKV(cli.KV, "my-prefix/") +// cli.Watcher = namespace.NewWatcher(cli.Watcher, "my-prefix/") +// cli.Lease = namespace.NewLease(cli.Lease, "my-prefix/") +// +// Now calls using 'cli' will namespace / prefix all keys with "my-prefix/": +// +// cli.Put(context.TODO(), "abc", "123") +// resp, _ := unprefixedKV.Get(context.TODO(), "my-prefix/abc") +// fmt.Printf("%s\n", resp.Kvs[0].Value) +// // Output: 123 +// unprefixedKV.Put(context.TODO(), "my-prefix/abc", "456") +// resp, _ = cli.Get(context.TODO(), "abc") +// fmt.Printf("%s\n", resp.Kvs[0].Value) +// // Output: 456 +// +package namespace diff --git a/vendor/go.etcd.io/etcd/client/v3/namespace/kv.go b/vendor/go.etcd.io/etcd/client/v3/namespace/kv.go new file mode 100644 index 0000000000..f745225cac --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/namespace/kv.go @@ -0,0 +1,206 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package namespace + +import ( + "context" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/client/v3" +) + +type kvPrefix struct { + clientv3.KV + pfx string +} + +// NewKV wraps a KV instance so that all requests +// are prefixed with a given string. +func NewKV(kv clientv3.KV, prefix string) clientv3.KV { + return &kvPrefix{kv, prefix} +} + +func (kv *kvPrefix) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (*clientv3.PutResponse, error) { + if len(key) == 0 { + return nil, rpctypes.ErrEmptyKey + } + op := kv.prefixOp(clientv3.OpPut(key, val, opts...)) + r, err := kv.KV.Do(ctx, op) + if err != nil { + return nil, err + } + put := r.Put() + kv.unprefixPutResponse(put) + return put, nil +} + +func (kv *kvPrefix) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { + if len(key) == 0 && !(clientv3.IsOptsWithFromKey(opts) || clientv3.IsOptsWithPrefix(opts)) { + return nil, rpctypes.ErrEmptyKey + } + r, err := kv.KV.Do(ctx, kv.prefixOp(clientv3.OpGet(key, opts...))) + if err != nil { + return nil, err + } + get := r.Get() + kv.unprefixGetResponse(get) + return get, nil +} + +func (kv *kvPrefix) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.DeleteResponse, error) { + if len(key) == 0 && !(clientv3.IsOptsWithFromKey(opts) || clientv3.IsOptsWithPrefix(opts)) { + return nil, rpctypes.ErrEmptyKey + } + r, err := kv.KV.Do(ctx, kv.prefixOp(clientv3.OpDelete(key, opts...))) + if err != nil { + return nil, err + } + del := r.Del() + kv.unprefixDeleteResponse(del) + return del, nil +} + +func (kv *kvPrefix) Do(ctx context.Context, op clientv3.Op) (clientv3.OpResponse, error) { + if len(op.KeyBytes()) == 0 && !op.IsTxn() { + return clientv3.OpResponse{}, rpctypes.ErrEmptyKey + } + r, err := kv.KV.Do(ctx, kv.prefixOp(op)) + if err != nil { + return r, err + } + switch { + case r.Get() != nil: + kv.unprefixGetResponse(r.Get()) + case r.Put() != nil: + kv.unprefixPutResponse(r.Put()) + case r.Del() != nil: + kv.unprefixDeleteResponse(r.Del()) + case r.Txn() != nil: + kv.unprefixTxnResponse(r.Txn()) + } + return r, nil +} + +type txnPrefix struct { + clientv3.Txn + kv *kvPrefix +} + +func (kv *kvPrefix) Txn(ctx context.Context) clientv3.Txn { + return &txnPrefix{kv.KV.Txn(ctx), kv} +} + +func (txn *txnPrefix) If(cs ...clientv3.Cmp) clientv3.Txn { + txn.Txn = txn.Txn.If(txn.kv.prefixCmps(cs)...) + return txn +} + +func (txn *txnPrefix) Then(ops ...clientv3.Op) clientv3.Txn { + txn.Txn = txn.Txn.Then(txn.kv.prefixOps(ops)...) + return txn +} + +func (txn *txnPrefix) Else(ops ...clientv3.Op) clientv3.Txn { + txn.Txn = txn.Txn.Else(txn.kv.prefixOps(ops)...) + return txn +} + +func (txn *txnPrefix) Commit() (*clientv3.TxnResponse, error) { + resp, err := txn.Txn.Commit() + if err != nil { + return nil, err + } + txn.kv.unprefixTxnResponse(resp) + return resp, nil +} + +func (kv *kvPrefix) prefixOp(op clientv3.Op) clientv3.Op { + if !op.IsTxn() { + begin, end := kv.prefixInterval(op.KeyBytes(), op.RangeBytes()) + op.WithKeyBytes(begin) + op.WithRangeBytes(end) + return op + } + cmps, thenOps, elseOps := op.Txn() + return clientv3.OpTxn(kv.prefixCmps(cmps), kv.prefixOps(thenOps), kv.prefixOps(elseOps)) +} + +func (kv *kvPrefix) unprefixGetResponse(resp *clientv3.GetResponse) { + for i := range resp.Kvs { + resp.Kvs[i].Key = resp.Kvs[i].Key[len(kv.pfx):] + } +} + +func (kv *kvPrefix) unprefixPutResponse(resp *clientv3.PutResponse) { + if resp.PrevKv != nil { + resp.PrevKv.Key = resp.PrevKv.Key[len(kv.pfx):] + } +} + +func (kv *kvPrefix) unprefixDeleteResponse(resp *clientv3.DeleteResponse) { + for i := range resp.PrevKvs { + resp.PrevKvs[i].Key = resp.PrevKvs[i].Key[len(kv.pfx):] + } +} + +func (kv *kvPrefix) unprefixTxnResponse(resp *clientv3.TxnResponse) { + for _, r := range resp.Responses { + switch tv := r.Response.(type) { + case *pb.ResponseOp_ResponseRange: + if tv.ResponseRange != nil { + kv.unprefixGetResponse((*clientv3.GetResponse)(tv.ResponseRange)) + } + case *pb.ResponseOp_ResponsePut: + if tv.ResponsePut != nil { + kv.unprefixPutResponse((*clientv3.PutResponse)(tv.ResponsePut)) + } + case *pb.ResponseOp_ResponseDeleteRange: + if tv.ResponseDeleteRange != nil { + kv.unprefixDeleteResponse((*clientv3.DeleteResponse)(tv.ResponseDeleteRange)) + } + case *pb.ResponseOp_ResponseTxn: + if tv.ResponseTxn != nil { + kv.unprefixTxnResponse((*clientv3.TxnResponse)(tv.ResponseTxn)) + } + default: + } + } +} + +func (kv *kvPrefix) prefixInterval(key, end []byte) (pfxKey []byte, pfxEnd []byte) { + return prefixInterval(kv.pfx, key, end) +} + +func (kv *kvPrefix) prefixCmps(cs []clientv3.Cmp) []clientv3.Cmp { + newCmps := make([]clientv3.Cmp, len(cs)) + for i := range cs { + newCmps[i] = cs[i] + pfxKey, endKey := kv.prefixInterval(cs[i].KeyBytes(), cs[i].RangeEnd) + newCmps[i].WithKeyBytes(pfxKey) + if len(cs[i].RangeEnd) != 0 { + newCmps[i].RangeEnd = endKey + } + } + return newCmps +} + +func (kv *kvPrefix) prefixOps(ops []clientv3.Op) []clientv3.Op { + newOps := make([]clientv3.Op, len(ops)) + for i := range ops { + newOps[i] = kv.prefixOp(ops[i]) + } + return newOps +} diff --git a/vendor/go.etcd.io/etcd/client/v3/namespace/lease.go b/vendor/go.etcd.io/etcd/client/v3/namespace/lease.go new file mode 100644 index 0000000000..f274de5e99 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/namespace/lease.go @@ -0,0 +1,57 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package namespace + +import ( + "bytes" + "context" + + "go.etcd.io/etcd/client/v3" +) + +type leasePrefix struct { + clientv3.Lease + pfx []byte +} + +// NewLease wraps a Lease interface to filter for only keys with a prefix +// and remove that prefix when fetching attached keys through TimeToLive. +func NewLease(l clientv3.Lease, prefix string) clientv3.Lease { + return &leasePrefix{l, []byte(prefix)} +} + +func (l *leasePrefix) TimeToLive(ctx context.Context, id clientv3.LeaseID, opts ...clientv3.LeaseOption) (*clientv3.LeaseTimeToLiveResponse, error) { + resp, err := l.Lease.TimeToLive(ctx, id, opts...) + if err != nil { + return nil, err + } + if len(resp.Keys) > 0 { + var outKeys [][]byte + for i := range resp.Keys { + if len(resp.Keys[i]) < len(l.pfx) { + // too short + continue + } + if !bytes.Equal(resp.Keys[i][:len(l.pfx)], l.pfx) { + // doesn't match prefix + continue + } + // strip prefix + outKeys = append(outKeys, resp.Keys[i][len(l.pfx):]) + } + resp.Keys = outKeys + } + return resp, nil +} diff --git a/vendor/go.etcd.io/etcd/client/v3/namespace/util.go b/vendor/go.etcd.io/etcd/client/v3/namespace/util.go new file mode 100644 index 0000000000..ecf04046c3 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/namespace/util.go @@ -0,0 +1,42 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package namespace + +func prefixInterval(pfx string, key, end []byte) (pfxKey []byte, pfxEnd []byte) { + pfxKey = make([]byte, len(pfx)+len(key)) + copy(pfxKey[copy(pfxKey, pfx):], key) + + if len(end) == 1 && end[0] == 0 { + // the edge of the keyspace + pfxEnd = make([]byte, len(pfx)) + copy(pfxEnd, pfx) + ok := false + for i := len(pfxEnd) - 1; i >= 0; i-- { + if pfxEnd[i]++; pfxEnd[i] != 0 { + ok = true + break + } + } + if !ok { + // 0xff..ff => 0x00 + pfxEnd = []byte{0} + } + } else if len(end) >= 1 { + pfxEnd = make([]byte, len(pfx)+len(end)) + copy(pfxEnd[copy(pfxEnd, pfx):], end) + } + + return pfxKey, pfxEnd +} diff --git a/vendor/go.etcd.io/etcd/client/v3/namespace/watch.go b/vendor/go.etcd.io/etcd/client/v3/namespace/watch.go new file mode 100644 index 0000000000..12362856d0 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/namespace/watch.go @@ -0,0 +1,83 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package namespace + +import ( + "context" + "sync" + + "go.etcd.io/etcd/client/v3" +) + +type watcherPrefix struct { + clientv3.Watcher + pfx string + + wg sync.WaitGroup + stopc chan struct{} + stopOnce sync.Once +} + +// NewWatcher wraps a Watcher instance so that all Watch requests +// are prefixed with a given string and all Watch responses have +// the prefix removed. +func NewWatcher(w clientv3.Watcher, prefix string) clientv3.Watcher { + return &watcherPrefix{Watcher: w, pfx: prefix, stopc: make(chan struct{})} +} + +func (w *watcherPrefix) Watch(ctx context.Context, key string, opts ...clientv3.OpOption) clientv3.WatchChan { + // since OpOption is opaque, determine range for prefixing through an OpGet + op := clientv3.OpGet(key, opts...) + end := op.RangeBytes() + pfxBegin, pfxEnd := prefixInterval(w.pfx, []byte(key), end) + if pfxEnd != nil { + opts = append(opts, clientv3.WithRange(string(pfxEnd))) + } + + wch := w.Watcher.Watch(ctx, string(pfxBegin), opts...) + + // translate watch events from prefixed to unprefixed + pfxWch := make(chan clientv3.WatchResponse) + w.wg.Add(1) + go func() { + defer func() { + close(pfxWch) + w.wg.Done() + }() + for wr := range wch { + for i := range wr.Events { + wr.Events[i].Kv.Key = wr.Events[i].Kv.Key[len(w.pfx):] + if wr.Events[i].PrevKv != nil { + wr.Events[i].PrevKv.Key = wr.Events[i].Kv.Key + } + } + select { + case pfxWch <- wr: + case <-ctx.Done(): + return + case <-w.stopc: + return + } + } + }() + return pfxWch +} + +func (w *watcherPrefix) Close() error { + err := w.Watcher.Close() + w.stopOnce.Do(func() { close(w.stopc) }) + w.wg.Wait() + return err +} diff --git a/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints.go b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints.go new file mode 100644 index 0000000000..72bd227874 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints.go @@ -0,0 +1,82 @@ +package endpoints + +import ( + "context" + + clientv3 "go.etcd.io/etcd/client/v3" +) + +// Endpoint represents a single address the connection can be established with. +// +// Inspired by: https://pkg.go.dev/google.golang.org/grpc/resolver#Address. +// Please document etcd version since which version each field is supported. +type Endpoint struct { + // Addr is the server address on which a connection will be established. + // Since etcd 3.1 + Addr string + + // Metadata is the information associated with Addr, which may be used + // to make load balancing decision. + // Since etcd 3.1 + Metadata interface{} +} + +type Operation uint8 + +const ( + // Add indicates an Endpoint is added. + Add Operation = iota + // Delete indicates an existing address is deleted. + Delete +) + +// Update describes a single edit action of an Endpoint. +type Update struct { + // Op - action Add or Delete. + Op Operation + Key string + Endpoint Endpoint +} + +// WatchChannel is used to deliver notifications about endpoints updates. +type WatchChannel <-chan []*Update + +// Key2EndpointMap maps etcd key into struct describing the endpoint. +type Key2EndpointMap map[string]Endpoint + +// UpdateWithOpts describes endpoint update (add or delete) together +// with etcd options (e.g. to attach an endpoint to a lease). +type UpdateWithOpts struct { + Update + Opts []clientv3.OpOption +} + +// NewAddUpdateOpts constructs UpdateWithOpts for endpoint registration. +func NewAddUpdateOpts(key string, endpoint Endpoint, opts ...clientv3.OpOption) *UpdateWithOpts { + return &UpdateWithOpts{Update: Update{Op: Add, Key: key, Endpoint: endpoint}, Opts: opts} +} + +// NewDeleteUpdateOpts constructs UpdateWithOpts for endpoint deletion. +func NewDeleteUpdateOpts(key string, opts ...clientv3.OpOption) *UpdateWithOpts { + return &UpdateWithOpts{Update: Update{Op: Delete, Key: key}, Opts: opts} +} + +// Manager can be used to add/remove & inspect endpoints stored in etcd for +// a particular target. +type Manager interface { + // Update allows to atomically add/remove a few endpoints from etcd. + Update(ctx context.Context, updates []*UpdateWithOpts) error + + // AddEndpoint registers a single endpoint in etcd. + // For more advanced use-cases use the Update method. + AddEndpoint(ctx context.Context, key string, endpoint Endpoint, opts ...clientv3.OpOption) error + // DeleteEndpoint deletes a single endpoint stored in etcd. + // For more advanced use-cases use the Update method. + DeleteEndpoint(ctx context.Context, key string, opts ...clientv3.OpOption) error + + // List returns all the endpoints for the current target as a map. + List(ctx context.Context) (Key2EndpointMap, error) + // NewWatchChannel creates a channel that populates or endpoint updates. + // Cancel the 'ctx' to close the watcher. + NewWatchChannel(ctx context.Context) (WatchChannel, error) +} diff --git a/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints_impl.go b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints_impl.go new file mode 100644 index 0000000000..37f04803e1 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/endpoints_impl.go @@ -0,0 +1,175 @@ +package endpoints + +// TODO: The API is not yet implemented. + +import ( + "context" + "encoding/json" + "errors" + "strings" + + clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/naming/endpoints/internal" + + "go.uber.org/zap" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type endpointManager struct { + // Client is an initialized etcd client. + client *clientv3.Client + target string +} + +// NewManager creates an endpoint manager which implements the interface of 'Manager'. +func NewManager(client *clientv3.Client, target string) (Manager, error) { + if client == nil { + return nil, errors.New("invalid etcd client") + } + + if target == "" { + return nil, errors.New("invalid target") + } + + em := &endpointManager{ + client: client, + target: target, + } + return em, nil +} + +func (m *endpointManager) Update(ctx context.Context, updates []*UpdateWithOpts) (err error) { + ops := make([]clientv3.Op, 0, len(updates)) + for _, update := range updates { + if !strings.HasPrefix(update.Key, m.target+"/") { + return status.Errorf(codes.InvalidArgument, "endpoints: endpoint key should be prefixed with '%s/' got: '%s'", m.target, update.Key) + } + + switch update.Op { + case Add: + internalUpdate := &internal.Update{ + Op: internal.Add, + Addr: update.Endpoint.Addr, + Metadata: update.Endpoint.Metadata, + } + + var v []byte + if v, err = json.Marshal(internalUpdate); err != nil { + return status.Error(codes.InvalidArgument, err.Error()) + } + ops = append(ops, clientv3.OpPut(update.Key, string(v), update.Opts...)) + case Delete: + ops = append(ops, clientv3.OpDelete(update.Key, update.Opts...)) + default: + return status.Error(codes.InvalidArgument, "endpoints: bad update op") + } + } + _, err = m.client.KV.Txn(ctx).Then(ops...).Commit() + return err +} + +func (m *endpointManager) AddEndpoint(ctx context.Context, key string, endpoint Endpoint, opts ...clientv3.OpOption) error { + return m.Update(ctx, []*UpdateWithOpts{NewAddUpdateOpts(key, endpoint, opts...)}) +} + +func (m *endpointManager) DeleteEndpoint(ctx context.Context, key string, opts ...clientv3.OpOption) error { + return m.Update(ctx, []*UpdateWithOpts{NewDeleteUpdateOpts(key, opts...)}) +} + +func (m *endpointManager) NewWatchChannel(ctx context.Context) (WatchChannel, error) { + resp, err := m.client.Get(ctx, m.target, clientv3.WithPrefix(), clientv3.WithSerializable()) + if err != nil { + return nil, err + } + + lg := m.client.GetLogger() + initUpdates := make([]*Update, 0, len(resp.Kvs)) + for _, kv := range resp.Kvs { + var iup internal.Update + if err := json.Unmarshal(kv.Value, &iup); err != nil { + lg.Warn("unmarshal endpoint update failed", zap.String("key", string(kv.Key)), zap.Error(err)) + continue + } + up := &Update{ + Op: Add, + Key: string(kv.Key), + Endpoint: Endpoint{Addr: iup.Addr, Metadata: iup.Metadata}, + } + initUpdates = append(initUpdates, up) + } + + upch := make(chan []*Update, 1) + if len(initUpdates) > 0 { + upch <- initUpdates + } + go m.watch(ctx, resp.Header.Revision+1, upch) + return upch, nil +} + +func (m *endpointManager) watch(ctx context.Context, rev int64, upch chan []*Update) { + defer close(upch) + + lg := m.client.GetLogger() + opts := []clientv3.OpOption{clientv3.WithRev(rev), clientv3.WithPrefix()} + wch := m.client.Watch(ctx, m.target, opts...) + for { + select { + case <-ctx.Done(): + return + case wresp, ok := <-wch: + if !ok { + lg.Warn("watch closed", zap.String("target", m.target)) + return + } + if wresp.Err() != nil { + lg.Warn("watch failed", zap.String("target", m.target), zap.Error(wresp.Err())) + return + } + + deltaUps := make([]*Update, 0, len(wresp.Events)) + for _, e := range wresp.Events { + var iup internal.Update + var err error + var op Operation + switch e.Type { + case clientv3.EventTypePut: + err = json.Unmarshal(e.Kv.Value, &iup) + op = Add + if err != nil { + lg.Warn("unmarshal endpoint update failed", zap.String("key", string(e.Kv.Key)), zap.Error(err)) + continue + } + case clientv3.EventTypeDelete: + iup = internal.Update{Op: internal.Delete} + op = Delete + default: + continue + } + up := &Update{Op: op, Key: string(e.Kv.Key), Endpoint: Endpoint{Addr: iup.Addr, Metadata: iup.Metadata}} + deltaUps = append(deltaUps, up) + } + if len(deltaUps) > 0 { + upch <- deltaUps + } + } + } +} + +func (m *endpointManager) List(ctx context.Context) (Key2EndpointMap, error) { + resp, err := m.client.Get(ctx, m.target, clientv3.WithPrefix(), clientv3.WithSerializable()) + if err != nil { + return nil, err + } + + eps := make(Key2EndpointMap) + for _, kv := range resp.Kvs { + var iup internal.Update + if err := json.Unmarshal(kv.Value, &iup); err != nil { + continue + } + + eps[string(kv.Key)] = Endpoint{Addr: iup.Addr, Metadata: iup.Metadata} + } + return eps, nil +} diff --git a/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/internal/update.go b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/internal/update.go new file mode 100644 index 0000000000..71aa83fed4 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/naming/endpoints/internal/update.go @@ -0,0 +1,38 @@ +package internal + +// Operation describes action performed on endpoint (addition vs deletion). +// Must stay JSON-format compatible with: +// https://pkg.go.dev/google.golang.org/grpc@v1.29.1/naming#Operation +type Operation uint8 + +const ( + // Add indicates a new address is added. + Add Operation = iota + // Delete indicates an existing address is deleted. + Delete +) + +// Update defines a persistent (JSON marshalled) format representing +// endpoint within the etcd storage. +// +// As the format can be persisted by one version of etcd client library and +// read by other the format must be kept backward compatible and +// in particular must be superset of the grpc(<=1.29.1) naming.Update structure: +// https://pkg.go.dev/google.golang.org/grpc@v1.29.1/naming#Update +// +// Please document since which version of etcd-client given property is supported. +// Please keep the naming consistent with e.g. https://pkg.go.dev/google.golang.org/grpc/resolver#Address. +// +// Notice that it is not valid having both empty string Addr and nil Metadata in an Update. +type Update struct { + // Op indicates the operation of the update. + // Since etcd 3.1. + Op Operation + // Addr is the updated address. It is empty string if there is no address update. + // Since etcd 3.1. + Addr string + // Metadata is the updated metadata. It is nil if there is no metadata update. + // Metadata is not required for a custom naming implementation. + // Since etcd 3.1. + Metadata interface{} +} diff --git a/vendor/go.etcd.io/etcd/client/v3/ordering/doc.go b/vendor/go.etcd.io/etcd/client/v3/ordering/doc.go new file mode 100644 index 0000000000..856f330580 --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/ordering/doc.go @@ -0,0 +1,42 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ordering is a clientv3 wrapper that caches response header revisions +// to detect ordering violations from stale responses. Users may define a +// policy on how to handle the ordering violation, but typically the client +// should connect to another endpoint and reissue the request. +// +// The most common situation where an ordering violation happens is a client +// reconnects to a partitioned member and issues a serializable read. Since the +// partitioned member is likely behind the last member, it may return a Get +// response based on a store revision older than the store revision used to +// service a prior Get on the former endpoint. +// +// First, create a client: +// +// cli, err := clientv3.New(clientv3.Config{Endpoints: []string{"localhost:2379"}}) +// if err != nil { +// // handle error! +// } +// +// Next, override the client interface with the ordering wrapper: +// +// vf := func(op clientv3.Op, resp clientv3.OpResponse, prevRev int64) error { +// return fmt.Errorf("ordering: issued %+v, got %+v, expected rev=%v", op, resp, prevRev) +// } +// cli.KV = ordering.NewKV(cli.KV, vf) +// +// Now calls using 'cli' will reject order violations with an error. +// +package ordering diff --git a/vendor/go.etcd.io/etcd/client/v3/ordering/kv.go b/vendor/go.etcd.io/etcd/client/v3/ordering/kv.go new file mode 100644 index 0000000000..7914fc4b9c --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/ordering/kv.go @@ -0,0 +1,149 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ordering + +import ( + "context" + "sync" + + "go.etcd.io/etcd/client/v3" +) + +// kvOrdering ensures that serialized requests do not return +// get with revisions less than the previous +// returned revision. +type kvOrdering struct { + clientv3.KV + orderViolationFunc OrderViolationFunc + prevRev int64 + revMu sync.RWMutex +} + +func NewKV(kv clientv3.KV, orderViolationFunc OrderViolationFunc) *kvOrdering { + return &kvOrdering{kv, orderViolationFunc, 0, sync.RWMutex{}} +} + +func (kv *kvOrdering) getPrevRev() int64 { + kv.revMu.RLock() + defer kv.revMu.RUnlock() + return kv.prevRev +} + +func (kv *kvOrdering) setPrevRev(currRev int64) { + kv.revMu.Lock() + defer kv.revMu.Unlock() + if currRev > kv.prevRev { + kv.prevRev = currRev + } +} + +func (kv *kvOrdering) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { + // prevRev is stored in a local variable in order to record the prevRev + // at the beginning of the Get operation, because concurrent + // access to kvOrdering could change the prevRev field in the + // middle of the Get operation. + prevRev := kv.getPrevRev() + op := clientv3.OpGet(key, opts...) + for { + r, err := kv.KV.Do(ctx, op) + if err != nil { + return nil, err + } + resp := r.Get() + if resp.Header.Revision == prevRev { + return resp, nil + } else if resp.Header.Revision > prevRev { + kv.setPrevRev(resp.Header.Revision) + return resp, nil + } + err = kv.orderViolationFunc(op, r, prevRev) + if err != nil { + return nil, err + } + } +} + +func (kv *kvOrdering) Txn(ctx context.Context) clientv3.Txn { + return &txnOrdering{ + kv.KV.Txn(ctx), + kv, + ctx, + sync.Mutex{}, + []clientv3.Cmp{}, + []clientv3.Op{}, + []clientv3.Op{}, + } +} + +// txnOrdering ensures that serialized requests do not return +// txn responses with revisions less than the previous +// returned revision. +type txnOrdering struct { + clientv3.Txn + *kvOrdering + ctx context.Context + mu sync.Mutex + cmps []clientv3.Cmp + thenOps []clientv3.Op + elseOps []clientv3.Op +} + +func (txn *txnOrdering) If(cs ...clientv3.Cmp) clientv3.Txn { + txn.mu.Lock() + defer txn.mu.Unlock() + txn.cmps = cs + txn.Txn.If(cs...) + return txn +} + +func (txn *txnOrdering) Then(ops ...clientv3.Op) clientv3.Txn { + txn.mu.Lock() + defer txn.mu.Unlock() + txn.thenOps = ops + txn.Txn.Then(ops...) + return txn +} + +func (txn *txnOrdering) Else(ops ...clientv3.Op) clientv3.Txn { + txn.mu.Lock() + defer txn.mu.Unlock() + txn.elseOps = ops + txn.Txn.Else(ops...) + return txn +} + +func (txn *txnOrdering) Commit() (*clientv3.TxnResponse, error) { + // prevRev is stored in a local variable in order to record the prevRev + // at the beginning of the Commit operation, because concurrent + // access to txnOrdering could change the prevRev field in the + // middle of the Commit operation. + prevRev := txn.getPrevRev() + opTxn := clientv3.OpTxn(txn.cmps, txn.thenOps, txn.elseOps) + for { + opResp, err := txn.KV.Do(txn.ctx, opTxn) + if err != nil { + return nil, err + } + txnResp := opResp.Txn() + if txnResp.Header.Revision >= prevRev { + txn.setPrevRev(txnResp.Header.Revision) + return txnResp, nil + } + err = txn.orderViolationFunc(opTxn, opResp, prevRev) + if err != nil { + return nil, err + } + } +} diff --git a/vendor/go.etcd.io/etcd/client/v3/ordering/util.go b/vendor/go.etcd.io/etcd/client/v3/ordering/util.go new file mode 100644 index 0000000000..f8f65c4c9b --- /dev/null +++ b/vendor/go.etcd.io/etcd/client/v3/ordering/util.go @@ -0,0 +1,42 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ordering + +import ( + "errors" + "sync/atomic" + + "go.etcd.io/etcd/client/v3" +) + +type OrderViolationFunc func(op clientv3.Op, resp clientv3.OpResponse, prevRev int64) error + +var ErrNoGreaterRev = errors.New("etcdclient: no cluster members have a revision higher than the previously received revision") + +func NewOrderViolationSwitchEndpointClosure(c *clientv3.Client) OrderViolationFunc { + violationCount := int32(0) + return func(_ clientv3.Op, _ clientv3.OpResponse, _ int64) error { + // Each request is assigned by round-robin load-balancer's picker to a different + // endpoints. If we cycled them 5 times (even with some level of concurrency), + // with high probability no endpoint points on a member with fresh data. + // TODO: Ideally we should track members (resp.opp.Header) that returned + // stale result and explicitly temporarily disable them in 'picker'. + if atomic.LoadInt32(&violationCount) > int32(5*len(c.Endpoints())) { + return ErrNoGreaterRev + } + atomic.AddInt32(&violationCount, 1) + return nil + } +} diff --git a/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_unix.go b/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_unix.go new file mode 100644 index 0000000000..78161d4961 --- /dev/null +++ b/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_unix.go @@ -0,0 +1,85 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows && !plan9 +// +build !windows,!plan9 + +package osutil + +import ( + "os" + "os/signal" + "sync" + "syscall" + + "go.uber.org/zap" +) + +// InterruptHandler is a function that is called on receiving a +// SIGTERM or SIGINT signal. +type InterruptHandler func() + +var ( + interruptRegisterMu, interruptExitMu sync.Mutex + // interruptHandlers holds all registered InterruptHandlers in order + // they will be executed. + interruptHandlers = []InterruptHandler{} +) + +// RegisterInterruptHandler registers a new InterruptHandler. Handlers registered +// after interrupt handing was initiated will not be executed. +func RegisterInterruptHandler(h InterruptHandler) { + interruptRegisterMu.Lock() + defer interruptRegisterMu.Unlock() + interruptHandlers = append(interruptHandlers, h) +} + +// HandleInterrupts calls the handler functions on receiving a SIGINT or SIGTERM. +func HandleInterrupts(lg *zap.Logger) { + notifier := make(chan os.Signal, 1) + signal.Notify(notifier, syscall.SIGINT, syscall.SIGTERM) + + go func() { + sig := <-notifier + + interruptRegisterMu.Lock() + ihs := make([]InterruptHandler, len(interruptHandlers)) + copy(ihs, interruptHandlers) + interruptRegisterMu.Unlock() + + interruptExitMu.Lock() + + if lg != nil { + lg.Info("received signal; shutting down", zap.String("signal", sig.String())) + } + + for _, h := range ihs { + h() + } + signal.Stop(notifier) + pid := syscall.Getpid() + // exit directly if it is the "init" process, since the kernel will not help to kill pid 1. + if pid == 1 { + os.Exit(0) + } + setDflSignal(sig.(syscall.Signal)) + syscall.Kill(pid, sig.(syscall.Signal)) + }() +} + +// Exit relays to os.Exit if no interrupt handlers are running, blocks otherwise. +func Exit(code int) { + interruptExitMu.Lock() + os.Exit(code) +} diff --git a/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_windows.go b/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_windows.go new file mode 100644 index 0000000000..7572690d2d --- /dev/null +++ b/vendor/go.etcd.io/etcd/pkg/v3/osutil/interrupt_windows.go @@ -0,0 +1,37 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows +// +build windows + +package osutil + +import ( + "os" + + "go.uber.org/zap" +) + +type InterruptHandler func() + +// RegisterInterruptHandler is a no-op on windows +func RegisterInterruptHandler(h InterruptHandler) {} + +// HandleInterrupts is a no-op on windows +func HandleInterrupts(*zap.Logger) {} + +// Exit calls os.Exit +func Exit(code int) { + os.Exit(code) +} diff --git a/vendor/go.etcd.io/etcd/pkg/v3/osutil/osutil.go b/vendor/go.etcd.io/etcd/pkg/v3/osutil/osutil.go new file mode 100644 index 0000000000..cbf96e2e04 --- /dev/null +++ b/vendor/go.etcd.io/etcd/pkg/v3/osutil/osutil.go @@ -0,0 +1,41 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package osutil implements operating system-related utility functions. +package osutil + +import ( + "os" + "strings" +) + +var ( + // support to override setting SIG_DFL so tests don't terminate early + setDflSignal = dflSignal +) + +func Unsetenv(key string) error { + envs := os.Environ() + os.Clearenv() + for _, e := range envs { + strs := strings.SplitN(e, "=", 2) + if strs[0] == key { + continue + } + if err := os.Setenv(strs[0], strs[1]); err != nil { + return err + } + } + return nil +} diff --git a/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal.go b/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal.go new file mode 100644 index 0000000000..c324ea16ec --- /dev/null +++ b/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal.go @@ -0,0 +1,22 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !linux || cov +// +build !linux cov + +package osutil + +import "syscall" + +func dflSignal(sig syscall.Signal) { /* nop */ } diff --git a/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal_linux.go b/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal_linux.go new file mode 100644 index 0000000000..93e0f35080 --- /dev/null +++ b/vendor/go.etcd.io/etcd/pkg/v3/osutil/signal_linux.go @@ -0,0 +1,31 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux && !cov +// +build linux,!cov + +package osutil + +import ( + "syscall" + "unsafe" +) + +// dflSignal sets the given signal to SIG_DFL +func dflSignal(sig syscall.Signal) { + // clearing out the sigact sets the signal to SIG_DFL + var sigactBuf [32]uint64 + ptr := unsafe.Pointer(&sigactBuf) + syscall.Syscall6(uintptr(syscall.SYS_RT_SIGACTION), uintptr(sig), uintptr(ptr), 0, 8, 0, 0) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/config.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/config.go new file mode 100644 index 0000000000..26db0e67d6 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/config.go @@ -0,0 +1,471 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Every change should be reflected on help.go as well. + +package etcdmain + +import ( + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "runtime" + + "go.etcd.io/etcd/api/v3/version" + "go.etcd.io/etcd/client/pkg/v3/logutil" + "go.etcd.io/etcd/pkg/v3/flags" + cconfig "go.etcd.io/etcd/server/v3/config" + "go.etcd.io/etcd/server/v3/embed" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" + "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" + + "go.uber.org/zap" + "sigs.k8s.io/yaml" +) + +var ( + proxyFlagOff = "off" + proxyFlagReadonly = "readonly" + proxyFlagOn = "on" + + fallbackFlagExit = "exit" + fallbackFlagProxy = "proxy" + + ignored = []string{ + "cluster-active-size", + "cluster-remove-delay", + "cluster-sync-interval", + "config", + "force", + "max-result-buffer", + "max-retry-attempts", + "peer-heartbeat-interval", + "peer-election-timeout", + "retry-interval", + "snapshot", + "v", + "vv", + // for coverage testing + "test.coverprofile", + "test.outputdir", + } +) + +type configProxy struct { + ProxyFailureWaitMs uint `json:"proxy-failure-wait"` + ProxyRefreshIntervalMs uint `json:"proxy-refresh-interval"` + ProxyDialTimeoutMs uint `json:"proxy-dial-timeout"` + ProxyWriteTimeoutMs uint `json:"proxy-write-timeout"` + ProxyReadTimeoutMs uint `json:"proxy-read-timeout"` + Fallback string + Proxy string + ProxyJSON string `json:"proxy"` + FallbackJSON string `json:"discovery-fallback"` +} + +// config holds the config for a command line invocation of etcd +type config struct { + ec embed.Config + cp configProxy + cf configFlags + configFile string + printVersion bool + ignored []string +} + +// configFlags has the set of flags used for command line parsing a Config +type configFlags struct { + flagSet *flag.FlagSet + clusterState *flags.SelectiveStringValue + fallback *flags.SelectiveStringValue + proxy *flags.SelectiveStringValue + v2deprecation *flags.SelectiveStringsValue +} + +func newConfig() *config { + cfg := &config{ + ec: *embed.NewConfig(), + cp: configProxy{ + Proxy: proxyFlagOff, + ProxyFailureWaitMs: 5000, + ProxyRefreshIntervalMs: 30000, + ProxyDialTimeoutMs: 1000, + ProxyWriteTimeoutMs: 5000, + }, + ignored: ignored, + } + cfg.cf = configFlags{ + flagSet: flag.NewFlagSet("etcd", flag.ContinueOnError), + clusterState: flags.NewSelectiveStringValue( + embed.ClusterStateFlagNew, + embed.ClusterStateFlagExisting, + ), + fallback: flags.NewSelectiveStringValue( + fallbackFlagProxy, + fallbackFlagExit, + ), + proxy: flags.NewSelectiveStringValue( + proxyFlagOff, + proxyFlagReadonly, + proxyFlagOn, + ), + v2deprecation: flags.NewSelectiveStringsValue( + string(cconfig.V2_DEPR_0_NOT_YET), + string(cconfig.V2_DEPR_1_WRITE_ONLY), + string(cconfig.V2_DEPR_1_WRITE_ONLY_DROP), + string(cconfig.V2_DEPR_2_GONE)), + } + + fs := cfg.cf.flagSet + fs.Usage = func() { + fmt.Fprintln(os.Stderr, usageline) + } + + fs.StringVar(&cfg.configFile, "config-file", "", "Path to the server configuration file. Note that if a configuration file is provided, other command line flags and environment variables will be ignored.") + + // member + fs.StringVar(&cfg.ec.Dir, "data-dir", cfg.ec.Dir, "Path to the data directory.") + fs.StringVar(&cfg.ec.WalDir, "wal-dir", cfg.ec.WalDir, "Path to the dedicated wal directory.") + fs.Var( + flags.NewUniqueURLsWithExceptions(embed.DefaultListenPeerURLs, ""), + "listen-peer-urls", + "List of URLs to listen on for peer traffic.", + ) + fs.Var( + flags.NewUniqueURLsWithExceptions(embed.DefaultListenClientURLs, ""), "listen-client-urls", + "List of URLs to listen on for client traffic.", + ) + fs.Var( + flags.NewUniqueURLsWithExceptions("", ""), + "listen-metrics-urls", + "List of URLs to listen on for the metrics and health endpoints.", + ) + fs.UintVar(&cfg.ec.MaxSnapFiles, "max-snapshots", cfg.ec.MaxSnapFiles, "Maximum number of snapshot files to retain (0 is unlimited).") + fs.UintVar(&cfg.ec.MaxWalFiles, "max-wals", cfg.ec.MaxWalFiles, "Maximum number of wal files to retain (0 is unlimited).") + fs.StringVar(&cfg.ec.Name, "name", cfg.ec.Name, "Human-readable name for this member.") + fs.Uint64Var(&cfg.ec.SnapshotCount, "snapshot-count", cfg.ec.SnapshotCount, "Number of committed transactions to trigger a snapshot to disk.") + fs.UintVar(&cfg.ec.TickMs, "heartbeat-interval", cfg.ec.TickMs, "Time (in milliseconds) of a heartbeat interval.") + fs.UintVar(&cfg.ec.ElectionMs, "election-timeout", cfg.ec.ElectionMs, "Time (in milliseconds) for an election to timeout.") + fs.BoolVar(&cfg.ec.InitialElectionTickAdvance, "initial-election-tick-advance", cfg.ec.InitialElectionTickAdvance, "Whether to fast-forward initial election ticks on boot for faster election.") + fs.Int64Var(&cfg.ec.QuotaBackendBytes, "quota-backend-bytes", cfg.ec.QuotaBackendBytes, "Raise alarms when backend size exceeds the given quota. 0 means use the default quota.") + fs.StringVar(&cfg.ec.BackendFreelistType, "backend-bbolt-freelist-type", cfg.ec.BackendFreelistType, "BackendFreelistType specifies the type of freelist that boltdb backend uses(array and map are supported types)") + fs.DurationVar(&cfg.ec.BackendBatchInterval, "backend-batch-interval", cfg.ec.BackendBatchInterval, "BackendBatchInterval is the maximum time before commit the backend transaction.") + fs.IntVar(&cfg.ec.BackendBatchLimit, "backend-batch-limit", cfg.ec.BackendBatchLimit, "BackendBatchLimit is the maximum operations before commit the backend transaction.") + fs.UintVar(&cfg.ec.MaxTxnOps, "max-txn-ops", cfg.ec.MaxTxnOps, "Maximum number of operations permitted in a transaction.") + fs.UintVar(&cfg.ec.MaxRequestBytes, "max-request-bytes", cfg.ec.MaxRequestBytes, "Maximum client request size in bytes the server will accept.") + fs.DurationVar(&cfg.ec.GRPCKeepAliveMinTime, "grpc-keepalive-min-time", cfg.ec.GRPCKeepAliveMinTime, "Minimum interval duration that a client should wait before pinging server.") + fs.DurationVar(&cfg.ec.GRPCKeepAliveInterval, "grpc-keepalive-interval", cfg.ec.GRPCKeepAliveInterval, "Frequency duration of server-to-client ping to check if a connection is alive (0 to disable).") + fs.DurationVar(&cfg.ec.GRPCKeepAliveTimeout, "grpc-keepalive-timeout", cfg.ec.GRPCKeepAliveTimeout, "Additional duration of wait before closing a non-responsive connection (0 to disable).") + fs.BoolVar(&cfg.ec.SocketOpts.ReusePort, "socket-reuse-port", cfg.ec.SocketOpts.ReusePort, "Enable to set socket option SO_REUSEPORT on listeners allowing rebinding of a port already in use.") + fs.BoolVar(&cfg.ec.SocketOpts.ReuseAddress, "socket-reuse-address", cfg.ec.SocketOpts.ReuseAddress, "Enable to set socket option SO_REUSEADDR on listeners allowing binding to an address in `TIME_WAIT` state.") + + // raft connection timeouts + fs.DurationVar(&rafthttp.ConnReadTimeout, "raft-read-timeout", rafthttp.DefaultConnReadTimeout, "Read timeout set on each rafthttp connection") + fs.DurationVar(&rafthttp.ConnWriteTimeout, "raft-write-timeout", rafthttp.DefaultConnWriteTimeout, "Write timeout set on each rafthttp connection") + + // clustering + fs.Var( + flags.NewUniqueURLsWithExceptions(embed.DefaultInitialAdvertisePeerURLs, ""), + "initial-advertise-peer-urls", + "List of this member's peer URLs to advertise to the rest of the cluster.", + ) + fs.Var( + flags.NewUniqueURLsWithExceptions(embed.DefaultAdvertiseClientURLs, ""), + "advertise-client-urls", + "List of this member's client URLs to advertise to the public.", + ) + fs.StringVar(&cfg.ec.Durl, "discovery", cfg.ec.Durl, "Discovery URL used to bootstrap the cluster.") + fs.Var(cfg.cf.fallback, "discovery-fallback", fmt.Sprintf("Valid values include %q", cfg.cf.fallback.Valids())) + + fs.StringVar(&cfg.ec.Dproxy, "discovery-proxy", cfg.ec.Dproxy, "HTTP proxy to use for traffic to discovery service.") + fs.StringVar(&cfg.ec.DNSCluster, "discovery-srv", cfg.ec.DNSCluster, "DNS domain used to bootstrap initial cluster.") + fs.StringVar(&cfg.ec.DNSClusterServiceName, "discovery-srv-name", cfg.ec.DNSClusterServiceName, "Service name to query when using DNS discovery.") + fs.StringVar(&cfg.ec.InitialCluster, "initial-cluster", cfg.ec.InitialCluster, "Initial cluster configuration for bootstrapping.") + fs.StringVar(&cfg.ec.InitialClusterToken, "initial-cluster-token", cfg.ec.InitialClusterToken, "Initial cluster token for the etcd cluster during bootstrap.") + fs.Var(cfg.cf.clusterState, "initial-cluster-state", "Initial cluster state ('new' or 'existing').") + + fs.BoolVar(&cfg.ec.StrictReconfigCheck, "strict-reconfig-check", cfg.ec.StrictReconfigCheck, "Reject reconfiguration requests that would cause quorum loss.") + + fs.BoolVar(&cfg.ec.PreVote, "pre-vote", cfg.ec.PreVote, "Enable to run an additional Raft election phase.") + + fs.BoolVar(&cfg.ec.EnableV2, "enable-v2", cfg.ec.EnableV2, "Accept etcd V2 client requests. Deprecated in v3.5. Will be decommission in v3.6.") + fs.StringVar(&cfg.ec.ExperimentalEnableV2V3, "experimental-enable-v2v3", cfg.ec.ExperimentalEnableV2V3, "v3 prefix for serving emulated v2 state. Deprecated in 3.5. Will be decomissioned in 3.6.") + fs.Var(cfg.cf.v2deprecation, "v2-deprecation", fmt.Sprintf("v2store deprecation stage: %q. ", cfg.cf.proxy.Valids())) + + // proxy + fs.Var(cfg.cf.proxy, "proxy", fmt.Sprintf("Valid values include %q", cfg.cf.proxy.Valids())) + fs.UintVar(&cfg.cp.ProxyFailureWaitMs, "proxy-failure-wait", cfg.cp.ProxyFailureWaitMs, "Time (in milliseconds) an endpoint will be held in a failed state.") + fs.UintVar(&cfg.cp.ProxyRefreshIntervalMs, "proxy-refresh-interval", cfg.cp.ProxyRefreshIntervalMs, "Time (in milliseconds) of the endpoints refresh interval.") + fs.UintVar(&cfg.cp.ProxyDialTimeoutMs, "proxy-dial-timeout", cfg.cp.ProxyDialTimeoutMs, "Time (in milliseconds) for a dial to timeout.") + fs.UintVar(&cfg.cp.ProxyWriteTimeoutMs, "proxy-write-timeout", cfg.cp.ProxyWriteTimeoutMs, "Time (in milliseconds) for a write to timeout.") + fs.UintVar(&cfg.cp.ProxyReadTimeoutMs, "proxy-read-timeout", cfg.cp.ProxyReadTimeoutMs, "Time (in milliseconds) for a read to timeout.") + + // security + fs.StringVar(&cfg.ec.ClientTLSInfo.CertFile, "cert-file", "", "Path to the client server TLS cert file.") + fs.StringVar(&cfg.ec.ClientTLSInfo.KeyFile, "key-file", "", "Path to the client server TLS key file.") + fs.StringVar(&cfg.ec.ClientTLSInfo.ClientCertFile, "client-cert-file", "", "Path to an explicit peer client TLS cert file otherwise cert file will be used when client auth is required.") + fs.StringVar(&cfg.ec.ClientTLSInfo.ClientKeyFile, "client-key-file", "", "Path to an explicit peer client TLS key file otherwise key file will be used when client auth is required.") + fs.BoolVar(&cfg.ec.ClientTLSInfo.ClientCertAuth, "client-cert-auth", false, "Enable client cert authentication.") + fs.StringVar(&cfg.ec.ClientTLSInfo.CRLFile, "client-crl-file", "", "Path to the client certificate revocation list file.") + fs.StringVar(&cfg.ec.ClientTLSInfo.AllowedHostname, "client-cert-allowed-hostname", "", "Allowed TLS hostname for client cert authentication.") + fs.StringVar(&cfg.ec.ClientTLSInfo.TrustedCAFile, "trusted-ca-file", "", "Path to the client server TLS trusted CA cert file.") + fs.BoolVar(&cfg.ec.ClientAutoTLS, "auto-tls", false, "Client TLS using generated certificates") + fs.StringVar(&cfg.ec.PeerTLSInfo.CertFile, "peer-cert-file", "", "Path to the peer server TLS cert file.") + fs.StringVar(&cfg.ec.PeerTLSInfo.KeyFile, "peer-key-file", "", "Path to the peer server TLS key file.") + fs.StringVar(&cfg.ec.PeerTLSInfo.ClientCertFile, "peer-client-cert-file", "", "Path to an explicit peer client TLS cert file otherwise peer cert file will be used when client auth is required.") + fs.StringVar(&cfg.ec.PeerTLSInfo.ClientKeyFile, "peer-client-key-file", "", "Path to an explicit peer client TLS key file otherwise peer key file will be used when client auth is required.") + fs.BoolVar(&cfg.ec.PeerTLSInfo.ClientCertAuth, "peer-client-cert-auth", false, "Enable peer client cert authentication.") + fs.StringVar(&cfg.ec.PeerTLSInfo.TrustedCAFile, "peer-trusted-ca-file", "", "Path to the peer server TLS trusted CA file.") + fs.BoolVar(&cfg.ec.PeerAutoTLS, "peer-auto-tls", false, "Peer TLS using generated certificates") + fs.UintVar(&cfg.ec.SelfSignedCertValidity, "self-signed-cert-validity", 1, "The validity period of the client and peer certificates, unit is year") + fs.StringVar(&cfg.ec.PeerTLSInfo.CRLFile, "peer-crl-file", "", "Path to the peer certificate revocation list file.") + fs.StringVar(&cfg.ec.PeerTLSInfo.AllowedCN, "peer-cert-allowed-cn", "", "Allowed CN for inter peer authentication.") + fs.StringVar(&cfg.ec.PeerTLSInfo.AllowedHostname, "peer-cert-allowed-hostname", "", "Allowed TLS hostname for inter peer authentication.") + fs.Var(flags.NewStringsValue(""), "cipher-suites", "Comma-separated list of supported TLS cipher suites between client/server and peers (empty will be auto-populated by Go).") + fs.BoolVar(&cfg.ec.PeerTLSInfo.SkipClientSANVerify, "experimental-peer-skip-client-san-verification", false, "Skip verification of SAN field in client certificate for peer connections.") + + fs.Var( + flags.NewUniqueURLsWithExceptions("*", "*"), + "cors", + "Comma-separated white list of origins for CORS, or cross-origin resource sharing, (empty or * means allow all)", + ) + fs.Var(flags.NewUniqueStringsValue("*"), "host-whitelist", "Comma-separated acceptable hostnames from HTTP client requests, if server is not secure (empty means allow all).") + + // logging + fs.StringVar(&cfg.ec.Logger, "logger", "zap", "Currently only supports 'zap' for structured logging.") + fs.Var(flags.NewUniqueStringsValue(embed.DefaultLogOutput), "log-outputs", "Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd, or list of comma separated output targets.") + fs.StringVar(&cfg.ec.LogLevel, "log-level", logutil.DefaultLogLevel, "Configures log level. Only supports debug, info, warn, error, panic, or fatal. Default 'info'.") + fs.BoolVar(&cfg.ec.EnableLogRotation, "enable-log-rotation", false, "Enable log rotation of a single log-outputs file target.") + fs.StringVar(&cfg.ec.LogRotationConfigJSON, "log-rotation-config-json", embed.DefaultLogRotationConfig, "Configures log rotation if enabled with a JSON logger config. Default: MaxSize=100(MB), MaxAge=0(days,no limit), MaxBackups=0(no limit), LocalTime=false(UTC), Compress=false(gzip)") + + // version + fs.BoolVar(&cfg.printVersion, "version", false, "Print the version and exit.") + + fs.StringVar(&cfg.ec.AutoCompactionRetention, "auto-compaction-retention", "0", "Auto compaction retention for mvcc key value store. 0 means disable auto compaction.") + fs.StringVar(&cfg.ec.AutoCompactionMode, "auto-compaction-mode", "periodic", "interpret 'auto-compaction-retention' one of: periodic|revision. 'periodic' for duration based retention, defaulting to hours if no time unit is provided (e.g. '5m'). 'revision' for revision number based retention.") + + // pprof profiler via HTTP + fs.BoolVar(&cfg.ec.EnablePprof, "enable-pprof", false, "Enable runtime profiling data via HTTP server. Address is at client URL + \"/debug/pprof/\"") + + // additional metrics + fs.StringVar(&cfg.ec.Metrics, "metrics", cfg.ec.Metrics, "Set level of detail for exported metrics, specify 'extensive' to include server side grpc histogram metrics") + + // experimental distributed tracing + fs.BoolVar(&cfg.ec.ExperimentalEnableDistributedTracing, "experimental-enable-distributed-tracing", false, "Enable experimental distributed tracing using OpenTelemetry Tracing.") + fs.StringVar(&cfg.ec.ExperimentalDistributedTracingAddress, "experimental-distributed-tracing-address", embed.ExperimentalDistributedTracingAddress, "Address for distributed tracing used for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag).") + fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceName, "experimental-distributed-tracing-service-name", embed.ExperimentalDistributedTracingServiceName, "Configures service name for distributed tracing to be used to define service name for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). 'etcd' is the default service name. Use the same service name for all instances of etcd.") + fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceInstanceID, "experimental-distributed-tracing-instance-id", "", "Configures service instance ID for distributed tracing to be used to define service instance ID key for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). There is no default value set. This ID must be unique per etcd instance.") + + // auth + fs.StringVar(&cfg.ec.AuthToken, "auth-token", cfg.ec.AuthToken, "Specify auth token specific options.") + fs.UintVar(&cfg.ec.BcryptCost, "bcrypt-cost", cfg.ec.BcryptCost, "Specify bcrypt algorithm cost factor for auth password hashing.") + fs.UintVar(&cfg.ec.AuthTokenTTL, "auth-token-ttl", cfg.ec.AuthTokenTTL, "The lifetime in seconds of the auth token.") + + // gateway + fs.BoolVar(&cfg.ec.EnableGRPCGateway, "enable-grpc-gateway", cfg.ec.EnableGRPCGateway, "Enable GRPC gateway.") + + // experimental + fs.BoolVar(&cfg.ec.ExperimentalInitialCorruptCheck, "experimental-initial-corrupt-check", cfg.ec.ExperimentalInitialCorruptCheck, "Enable to check data corruption before serving any client/peer traffic.") + fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.") + + fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.") + // TODO: delete in v3.7 + fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpointPersist, "experimental-enable-lease-checkpoint-persist", false, "Enable persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled. Requires experimental-enable-lease-checkpoint to be enabled.") + fs.IntVar(&cfg.ec.ExperimentalCompactionBatchLimit, "experimental-compaction-batch-limit", cfg.ec.ExperimentalCompactionBatchLimit, "Sets the maximum revisions deleted in each compaction batch.") + fs.DurationVar(&cfg.ec.ExperimentalWatchProgressNotifyInterval, "experimental-watch-progress-notify-interval", cfg.ec.ExperimentalWatchProgressNotifyInterval, "Duration of periodic watch progress notifications.") + fs.DurationVar(&cfg.ec.ExperimentalDowngradeCheckTime, "experimental-downgrade-check-time", cfg.ec.ExperimentalDowngradeCheckTime, "Duration of time between two downgrade status check.") + fs.DurationVar(&cfg.ec.ExperimentalWarningApplyDuration, "experimental-warning-apply-duration", cfg.ec.ExperimentalWarningApplyDuration, "Time duration after which a warning is generated if request takes more time.") + fs.BoolVar(&cfg.ec.ExperimentalMemoryMlock, "experimental-memory-mlock", cfg.ec.ExperimentalMemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.") + fs.BoolVar(&cfg.ec.ExperimentalTxnModeWriteWithSharedBuffer, "experimental-txn-mode-write-with-shared-buffer", true, "Enable the write transaction to use a shared buffer in its readonly check operations.") + fs.UintVar(&cfg.ec.ExperimentalBootstrapDefragThresholdMegabytes, "experimental-bootstrap-defrag-threshold-megabytes", 0, "Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect.") + fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.") + + // unsafe + fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") + fs.BoolVar(&cfg.ec.ForceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster.") + + // ignored + for _, f := range cfg.ignored { + fs.Var(&flags.IgnoredFlag{Name: f}, f, "") + } + return cfg +} + +func (cfg *config) parse(arguments []string) error { + perr := cfg.cf.flagSet.Parse(arguments) + switch perr { + case nil: + case flag.ErrHelp: + fmt.Println(flagsline) + os.Exit(0) + default: + os.Exit(2) + } + if len(cfg.cf.flagSet.Args()) != 0 { + return fmt.Errorf("'%s' is not a valid flag", cfg.cf.flagSet.Arg(0)) + } + + if cfg.printVersion { + fmt.Printf("etcd Version: %s\n", version.Version) + fmt.Printf("Git SHA: %s\n", version.GitSHA) + fmt.Printf("Go Version: %s\n", runtime.Version()) + fmt.Printf("Go OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH) + os.Exit(0) + } + + var err error + + // This env variable must be parsed separately + // because we need to determine whether to use or + // ignore the env variables based on if the config file is set. + if cfg.configFile == "" { + cfg.configFile = os.Getenv(flags.FlagToEnv("ETCD", "config-file")) + } + + if cfg.configFile != "" { + err = cfg.configFromFile(cfg.configFile) + if lg := cfg.ec.GetLogger(); lg != nil { + lg.Info( + "loaded server configuration, other configuration command line flags and environment variables will be ignored if provided", + zap.String("path", cfg.configFile), + ) + } + } else { + err = cfg.configFromCmdLine() + } + + if cfg.ec.V2Deprecation == "" { + cfg.ec.V2Deprecation = cconfig.V2_DEPR_DEFAULT + } + + // now logger is set up + return err +} + +func (cfg *config) configFromCmdLine() error { + // user-specified logger is not setup yet, use this logger during flag parsing + lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) + if err != nil { + return err + } + verKey := "ETCD_VERSION" + if verVal := os.Getenv(verKey); verVal != "" { + // unset to avoid any possible side-effect. + os.Unsetenv(verKey) + + lg.Warn( + "cannot set special environment variable", + zap.String("key", verKey), + zap.String("value", verVal), + ) + } + + err = flags.SetFlagsFromEnv(lg, "ETCD", cfg.cf.flagSet) + if err != nil { + return err + } + + if rafthttp.ConnReadTimeout < rafthttp.DefaultConnReadTimeout { + rafthttp.ConnReadTimeout = rafthttp.DefaultConnReadTimeout + lg.Info(fmt.Sprintf("raft-read-timeout increased to minimum value: %v", rafthttp.DefaultConnReadTimeout)) + } + if rafthttp.ConnWriteTimeout < rafthttp.DefaultConnWriteTimeout { + rafthttp.ConnWriteTimeout = rafthttp.DefaultConnWriteTimeout + lg.Info(fmt.Sprintf("raft-write-timeout increased to minimum value: %v", rafthttp.DefaultConnWriteTimeout)) + } + + cfg.ec.LPUrls = flags.UniqueURLsFromFlag(cfg.cf.flagSet, "listen-peer-urls") + cfg.ec.APUrls = flags.UniqueURLsFromFlag(cfg.cf.flagSet, "initial-advertise-peer-urls") + cfg.ec.LCUrls = flags.UniqueURLsFromFlag(cfg.cf.flagSet, "listen-client-urls") + cfg.ec.ACUrls = flags.UniqueURLsFromFlag(cfg.cf.flagSet, "advertise-client-urls") + cfg.ec.ListenMetricsUrls = flags.UniqueURLsFromFlag(cfg.cf.flagSet, "listen-metrics-urls") + + cfg.ec.CORS = flags.UniqueURLsMapFromFlag(cfg.cf.flagSet, "cors") + cfg.ec.HostWhitelist = flags.UniqueStringsMapFromFlag(cfg.cf.flagSet, "host-whitelist") + + cfg.ec.CipherSuites = flags.StringsFromFlag(cfg.cf.flagSet, "cipher-suites") + + cfg.ec.LogOutputs = flags.UniqueStringsFromFlag(cfg.cf.flagSet, "log-outputs") + + cfg.ec.ClusterState = cfg.cf.clusterState.String() + cfg.cp.Fallback = cfg.cf.fallback.String() + cfg.cp.Proxy = cfg.cf.proxy.String() + + cfg.ec.V2Deprecation = cconfig.V2DeprecationEnum(cfg.cf.v2deprecation.String()) + + // disable default advertise-client-urls if lcurls is set + missingAC := flags.IsSet(cfg.cf.flagSet, "listen-client-urls") && !flags.IsSet(cfg.cf.flagSet, "advertise-client-urls") + if !cfg.mayBeProxy() && missingAC { + cfg.ec.ACUrls = nil + } + + // disable default initial-cluster if discovery is set + if (cfg.ec.Durl != "" || cfg.ec.DNSCluster != "" || cfg.ec.DNSClusterServiceName != "") && !flags.IsSet(cfg.cf.flagSet, "initial-cluster") { + cfg.ec.InitialCluster = "" + } + + return cfg.validate() +} + +func (cfg *config) configFromFile(path string) error { + eCfg, err := embed.ConfigFromFile(path) + if err != nil { + return err + } + cfg.ec = *eCfg + + // load extra config information + b, rerr := ioutil.ReadFile(path) + if rerr != nil { + return rerr + } + if yerr := yaml.Unmarshal(b, &cfg.cp); yerr != nil { + return yerr + } + + if cfg.cp.FallbackJSON != "" { + if err := cfg.cf.fallback.Set(cfg.cp.FallbackJSON); err != nil { + log.Fatalf("unexpected error setting up discovery-fallback flag: %v", err) + } + cfg.cp.Fallback = cfg.cf.fallback.String() + } + + if cfg.cp.ProxyJSON != "" { + if err := cfg.cf.proxy.Set(cfg.cp.ProxyJSON); err != nil { + log.Fatalf("unexpected error setting up proxyFlag: %v", err) + } + cfg.cp.Proxy = cfg.cf.proxy.String() + } + return nil +} + +func (cfg *config) mayBeProxy() bool { + mayFallbackToProxy := cfg.ec.Durl != "" && cfg.cp.Fallback == fallbackFlagProxy + return cfg.cp.Proxy != proxyFlagOff || mayFallbackToProxy +} + +func (cfg *config) validate() error { + err := cfg.ec.Validate() + // TODO(yichengq): check this for joining through discovery service case + if err == embed.ErrUnsetAdvertiseClientURLsFlag && cfg.mayBeProxy() { + return nil + } + return err +} + +func (cfg config) isProxy() bool { return cfg.cf.proxy.String() != proxyFlagOff } +func (cfg config) isReadonlyProxy() bool { return cfg.cf.proxy.String() == proxyFlagReadonly } +func (cfg config) shouldFallbackToProxy() bool { return cfg.cf.fallback.String() == fallbackFlagProxy } diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/doc.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/doc.go new file mode 100644 index 0000000000..ff281aabc8 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/doc.go @@ -0,0 +1,16 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package etcdmain contains the main entry point for the etcd binary. +package etcdmain diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/etcd.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/etcd.go new file mode 100644 index 0000000000..470eb83be9 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/etcd.go @@ -0,0 +1,488 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "reflect" + "runtime" + "strings" + "time" + + "go.etcd.io/etcd/client/pkg/v3/fileutil" + "go.etcd.io/etcd/client/pkg/v3/logutil" + "go.etcd.io/etcd/client/pkg/v3/transport" + "go.etcd.io/etcd/client/pkg/v3/types" + pkgioutil "go.etcd.io/etcd/pkg/v3/ioutil" + "go.etcd.io/etcd/pkg/v3/osutil" + "go.etcd.io/etcd/server/v3/embed" + "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" + "go.etcd.io/etcd/server/v3/etcdserver/api/v2discovery" + "go.etcd.io/etcd/server/v3/proxy/httpproxy" + + "go.uber.org/zap" + "google.golang.org/grpc" +) + +type dirType string + +var ( + dirMember = dirType("member") + dirProxy = dirType("proxy") + dirEmpty = dirType("empty") +) + +func startEtcdOrProxyV2(args []string) { + grpc.EnableTracing = false + + cfg := newConfig() + defaultInitialCluster := cfg.ec.InitialCluster + + err := cfg.parse(args[1:]) + lg := cfg.ec.GetLogger() + // If we failed to parse the whole configuration, print the error using + // preferably the resolved logger from the config, + // but if does not exists, create a new temporary logger. + if lg == nil { + var zapError error + // use this logger + lg, zapError = logutil.CreateDefaultZapLogger(zap.InfoLevel) + if zapError != nil { + fmt.Printf("error creating zap logger %v", zapError) + os.Exit(1) + } + } + lg.Info("Running: ", zap.Strings("args", args)) + if err != nil { + lg.Warn("failed to verify flags", zap.Error(err)) + switch err { + case embed.ErrUnsetAdvertiseClientURLsFlag: + lg.Warn("advertise client URLs are not set", zap.Error(err)) + } + os.Exit(1) + } + + cfg.ec.SetupGlobalLoggers() + + defer func() { + logger := cfg.ec.GetLogger() + if logger != nil { + logger.Sync() + } + }() + + defaultHost, dhErr := (&cfg.ec).UpdateDefaultClusterFromName(defaultInitialCluster) + if defaultHost != "" { + lg.Info( + "detected default host for advertise", + zap.String("host", defaultHost), + ) + } + if dhErr != nil { + lg.Info("failed to detect default host", zap.Error(dhErr)) + } + + if cfg.ec.Dir == "" { + cfg.ec.Dir = fmt.Sprintf("%v.etcd", cfg.ec.Name) + lg.Warn( + "'data-dir' was empty; using default", + zap.String("data-dir", cfg.ec.Dir), + ) + } + + var stopped <-chan struct{} + var errc <-chan error + + which := identifyDataDirOrDie(cfg.ec.GetLogger(), cfg.ec.Dir) + if which != dirEmpty { + lg.Info( + "server has been already initialized", + zap.String("data-dir", cfg.ec.Dir), + zap.String("dir-type", string(which)), + ) + switch which { + case dirMember: + stopped, errc, err = startEtcd(&cfg.ec) + case dirProxy: + err = startProxy(cfg) + default: + lg.Panic( + "unknown directory type", + zap.String("dir-type", string(which)), + ) + } + } else { + shouldProxy := cfg.isProxy() + if !shouldProxy { + stopped, errc, err = startEtcd(&cfg.ec) + if derr, ok := err.(*etcdserver.DiscoveryError); ok && derr.Err == v2discovery.ErrFullCluster { + if cfg.shouldFallbackToProxy() { + lg.Warn( + "discovery cluster is full, falling back to proxy", + zap.String("fallback-proxy", fallbackFlagProxy), + zap.Error(err), + ) + shouldProxy = true + } + } else if err != nil { + lg.Warn("failed to start etcd", zap.Error(err)) + } + } + if shouldProxy { + err = startProxy(cfg) + } + } + + if err != nil { + if derr, ok := err.(*etcdserver.DiscoveryError); ok { + switch derr.Err { + case v2discovery.ErrDuplicateID: + lg.Warn( + "member has been registered with discovery service", + zap.String("name", cfg.ec.Name), + zap.String("discovery-token", cfg.ec.Durl), + zap.Error(derr.Err), + ) + lg.Warn( + "but could not find valid cluster configuration", + zap.String("data-dir", cfg.ec.Dir), + ) + lg.Warn("check data dir if previous bootstrap succeeded") + lg.Warn("or use a new discovery token if previous bootstrap failed") + + case v2discovery.ErrDuplicateName: + lg.Warn( + "member with duplicated name has already been registered", + zap.String("discovery-token", cfg.ec.Durl), + zap.Error(derr.Err), + ) + lg.Warn("cURL the discovery token URL for details") + lg.Warn("do not reuse discovery token; generate a new one to bootstrap a cluster") + + default: + lg.Warn( + "failed to bootstrap; discovery token was already used", + zap.String("discovery-token", cfg.ec.Durl), + zap.Error(err), + ) + lg.Warn("do not reuse discovery token; generate a new one to bootstrap a cluster") + } + os.Exit(1) + } + + if strings.Contains(err.Error(), "include") && strings.Contains(err.Error(), "--initial-cluster") { + lg.Warn("failed to start", zap.Error(err)) + if cfg.ec.InitialCluster == cfg.ec.InitialClusterFromName(cfg.ec.Name) { + lg.Warn("forgot to set --initial-cluster?") + } + if types.URLs(cfg.ec.APUrls).String() == embed.DefaultInitialAdvertisePeerURLs { + lg.Warn("forgot to set --initial-advertise-peer-urls?") + } + if cfg.ec.InitialCluster == cfg.ec.InitialClusterFromName(cfg.ec.Name) && len(cfg.ec.Durl) == 0 { + lg.Warn("--discovery flag is not set") + } + os.Exit(1) + } + lg.Fatal("discovery failed", zap.Error(err)) + } + + osutil.HandleInterrupts(lg) + + // At this point, the initialization of etcd is done. + // The listeners are listening on the TCP ports and ready + // for accepting connections. The etcd instance should be + // joined with the cluster and ready to serve incoming + // connections. + notifySystemd(lg) + + select { + case lerr := <-errc: + // fatal out on listener errors + lg.Fatal("listener failed", zap.Error(lerr)) + case <-stopped: + } + + osutil.Exit(0) +} + +// startEtcd runs StartEtcd in addition to hooks needed for standalone etcd. +func startEtcd(cfg *embed.Config) (<-chan struct{}, <-chan error, error) { + e, err := embed.StartEtcd(cfg) + if err != nil { + return nil, nil, err + } + osutil.RegisterInterruptHandler(e.Close) + select { + case <-e.Server.ReadyNotify(): // wait for e.Server to join the cluster + case <-e.Server.StopNotify(): // publish aborted from 'ErrStopped' + } + return e.Server.StopNotify(), e.Err(), nil +} + +// startProxy launches an HTTP proxy for client communication which proxies to other etcd nodes. +func startProxy(cfg *config) error { + lg := cfg.ec.GetLogger() + lg.Info("v2 API proxy starting") + + clientTLSInfo := cfg.ec.ClientTLSInfo + if clientTLSInfo.Empty() { + // Support old proxy behavior of defaulting to PeerTLSInfo + // for both client and peer connections. + clientTLSInfo = cfg.ec.PeerTLSInfo + } + clientTLSInfo.InsecureSkipVerify = cfg.ec.ClientAutoTLS + cfg.ec.PeerTLSInfo.InsecureSkipVerify = cfg.ec.PeerAutoTLS + + pt, err := transport.NewTimeoutTransport( + clientTLSInfo, + time.Duration(cfg.cp.ProxyDialTimeoutMs)*time.Millisecond, + time.Duration(cfg.cp.ProxyReadTimeoutMs)*time.Millisecond, + time.Duration(cfg.cp.ProxyWriteTimeoutMs)*time.Millisecond, + ) + if err != nil { + return err + } + pt.MaxIdleConnsPerHost = httpproxy.DefaultMaxIdleConnsPerHost + + if err = cfg.ec.PeerSelfCert(); err != nil { + lg.Fatal("failed to get self-signed certs for peer", zap.Error(err)) + } + tr, err := transport.NewTimeoutTransport( + cfg.ec.PeerTLSInfo, + time.Duration(cfg.cp.ProxyDialTimeoutMs)*time.Millisecond, + time.Duration(cfg.cp.ProxyReadTimeoutMs)*time.Millisecond, + time.Duration(cfg.cp.ProxyWriteTimeoutMs)*time.Millisecond, + ) + if err != nil { + return err + } + + cfg.ec.Dir = filepath.Join(cfg.ec.Dir, "proxy") + err = fileutil.TouchDirAll(cfg.ec.Dir) + if err != nil { + return err + } + + var peerURLs []string + clusterfile := filepath.Join(cfg.ec.Dir, "cluster") + + b, err := ioutil.ReadFile(clusterfile) + switch { + case err == nil: + if cfg.ec.Durl != "" { + lg.Warn( + "discovery token ignored since the proxy has already been initialized; valid cluster file found", + zap.String("cluster-file", clusterfile), + ) + } + if cfg.ec.DNSCluster != "" { + lg.Warn( + "DNS SRV discovery ignored since the proxy has already been initialized; valid cluster file found", + zap.String("cluster-file", clusterfile), + ) + } + urls := struct{ PeerURLs []string }{} + err = json.Unmarshal(b, &urls) + if err != nil { + return err + } + peerURLs = urls.PeerURLs + lg.Info( + "proxy using peer URLS from cluster file", + zap.Strings("peer-urls", peerURLs), + zap.String("cluster-file", clusterfile), + ) + + case os.IsNotExist(err): + var urlsmap types.URLsMap + urlsmap, _, err = cfg.ec.PeerURLsMapAndToken("proxy") + if err != nil { + return fmt.Errorf("error setting up initial cluster: %v", err) + } + + if cfg.ec.Durl != "" { + var s string + s, err = v2discovery.GetCluster(lg, cfg.ec.Durl, cfg.ec.Dproxy) + if err != nil { + return err + } + if urlsmap, err = types.NewURLsMap(s); err != nil { + return err + } + } + peerURLs = urlsmap.URLs() + lg.Info("proxy using peer URLS", zap.Strings("peer-urls", peerURLs)) + + default: + return err + } + + clientURLs := []string{} + uf := func() []string { + gcls, gerr := etcdserver.GetClusterFromRemotePeers(lg, peerURLs, tr) + if gerr != nil { + lg.Warn( + "failed to get cluster from remote peers", + zap.Strings("peer-urls", peerURLs), + zap.Error(gerr), + ) + return []string{} + } + + clientURLs = gcls.ClientURLs() + urls := struct{ PeerURLs []string }{gcls.PeerURLs()} + b, jerr := json.Marshal(urls) + if jerr != nil { + lg.Warn("proxy failed to marshal peer URLs", zap.Error(jerr)) + return clientURLs + } + + err = pkgioutil.WriteAndSyncFile(clusterfile+".bak", b, 0600) + if err != nil { + lg.Warn("proxy failed to write cluster file", zap.Error(err)) + return clientURLs + } + err = os.Rename(clusterfile+".bak", clusterfile) + if err != nil { + lg.Warn( + "proxy failed to rename cluster file", + zap.String("path", clusterfile), + zap.Error(err), + ) + return clientURLs + } + if !reflect.DeepEqual(gcls.PeerURLs(), peerURLs) { + lg.Info( + "proxy updated peer URLs", + zap.Strings("from", peerURLs), + zap.Strings("to", gcls.PeerURLs()), + ) + } + peerURLs = gcls.PeerURLs() + + return clientURLs + } + ph := httpproxy.NewHandler(lg, pt, uf, time.Duration(cfg.cp.ProxyFailureWaitMs)*time.Millisecond, time.Duration(cfg.cp.ProxyRefreshIntervalMs)*time.Millisecond) + ph = embed.WrapCORS(cfg.ec.CORS, ph) + + if cfg.isReadonlyProxy() { + ph = httpproxy.NewReadonlyHandler(ph) + } + + // setup self signed certs when serving https + cHosts, cTLS := []string{}, false + for _, u := range cfg.ec.LCUrls { + cHosts = append(cHosts, u.Host) + cTLS = cTLS || u.Scheme == "https" + } + for _, u := range cfg.ec.ACUrls { + cHosts = append(cHosts, u.Host) + cTLS = cTLS || u.Scheme == "https" + } + listenerTLS := cfg.ec.ClientTLSInfo + if cfg.ec.ClientAutoTLS && cTLS { + listenerTLS, err = transport.SelfCert(cfg.ec.GetLogger(), filepath.Join(cfg.ec.Dir, "clientCerts"), cHosts, cfg.ec.SelfSignedCertValidity) + if err != nil { + lg.Fatal("failed to initialize self-signed client cert", zap.Error(err)) + } + } + + // Start a proxy server goroutine for each listen address + for _, u := range cfg.ec.LCUrls { + l, err := transport.NewListener(u.Host, u.Scheme, &listenerTLS) + if err != nil { + return err + } + + host := u.String() + go func() { + lg.Info("v2 proxy started listening on client requests", zap.String("host", host)) + mux := http.NewServeMux() + etcdhttp.HandlePrometheus(mux) // v2 proxy just uses the same port + mux.Handle("/", ph) + lg.Fatal("done serving", zap.Error(http.Serve(l, mux))) + }() + } + return nil +} + +// identifyDataDirOrDie returns the type of the data dir. +// Dies if the datadir is invalid. +func identifyDataDirOrDie(lg *zap.Logger, dir string) dirType { + names, err := fileutil.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return dirEmpty + } + lg.Fatal("failed to list data directory", zap.String("dir", dir), zap.Error(err)) + } + + var m, p bool + for _, name := range names { + switch dirType(name) { + case dirMember: + m = true + case dirProxy: + p = true + default: + lg.Warn( + "found invalid file under data directory", + zap.String("filename", name), + zap.String("data-dir", dir), + ) + } + } + + if m && p { + lg.Fatal("invalid datadir; both member and proxy directories exist") + } + if m { + return dirMember + } + if p { + return dirProxy + } + return dirEmpty +} + +func checkSupportArch() { + lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) + if err != nil { + panic(err) + } + // to add a new platform, check https://github.com/etcd-io/website/blob/main/content/en/docs/next/op-guide/supported-platform.md + if runtime.GOARCH == "amd64" || + runtime.GOARCH == "arm64" || + runtime.GOARCH == "ppc64le" || + runtime.GOARCH == "s390x" { + return + } + // unsupported arch only configured via environment variable + // so unset here to not parse through flag + defer os.Unsetenv("ETCD_UNSUPPORTED_ARCH") + if env, ok := os.LookupEnv("ETCD_UNSUPPORTED_ARCH"); ok && env == runtime.GOARCH { + lg.Info("running etcd on unsupported architecture since ETCD_UNSUPPORTED_ARCH is set", zap.String("arch", env)) + return + } + + lg.Error("running etcd on unsupported architecture since ETCD_UNSUPPORTED_ARCH is set", zap.String("arch", runtime.GOARCH)) + os.Exit(1) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/gateway.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/gateway.go new file mode 100644 index 0000000000..64fb90df2c --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/gateway.go @@ -0,0 +1,183 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "fmt" + "net" + "net/url" + "os" + "time" + + "go.etcd.io/etcd/client/pkg/v3/logutil" + "go.etcd.io/etcd/server/v3/proxy/tcpproxy" + + "github.com/spf13/cobra" + "go.uber.org/zap" +) + +var ( + gatewayListenAddr string + gatewayEndpoints []string + gatewayDNSCluster string + gatewayDNSClusterServiceName string + gatewayInsecureDiscovery bool + gatewayRetryDelay time.Duration + gatewayCA string +) + +var ( + rootCmd = &cobra.Command{ + Use: "etcd", + Short: "etcd server", + SuggestFor: []string{"etcd"}, + } +) + +func init() { + rootCmd.AddCommand(newGatewayCommand()) +} + +// newGatewayCommand returns the cobra command for "gateway". +func newGatewayCommand() *cobra.Command { + lpc := &cobra.Command{ + Use: "gateway ", + Short: "gateway related command", + } + lpc.AddCommand(newGatewayStartCommand()) + + return lpc +} + +func newGatewayStartCommand() *cobra.Command { + cmd := cobra.Command{ + Use: "start", + Short: "start the gateway", + Run: startGateway, + } + + cmd.Flags().StringVar(&gatewayListenAddr, "listen-addr", "127.0.0.1:23790", "listen address") + cmd.Flags().StringVar(&gatewayDNSCluster, "discovery-srv", "", "DNS domain used to bootstrap initial cluster") + cmd.Flags().StringVar(&gatewayDNSClusterServiceName, "discovery-srv-name", "", "service name to query when using DNS discovery") + cmd.Flags().BoolVar(&gatewayInsecureDiscovery, "insecure-discovery", false, "accept insecure SRV records") + cmd.Flags().StringVar(&gatewayCA, "trusted-ca-file", "", "path to the client server TLS CA file for verifying the discovered endpoints when discovery-srv is provided.") + + cmd.Flags().StringSliceVar(&gatewayEndpoints, "endpoints", []string{"127.0.0.1:2379"}, "comma separated etcd cluster endpoints") + + cmd.Flags().DurationVar(&gatewayRetryDelay, "retry-delay", time.Minute, "duration of delay before retrying failed endpoints") + + return &cmd +} + +func stripSchema(eps []string) []string { + var endpoints []string + for _, ep := range eps { + if u, err := url.Parse(ep); err == nil && u.Host != "" { + ep = u.Host + } + endpoints = append(endpoints, ep) + } + return endpoints +} + +func startGateway(cmd *cobra.Command, args []string) { + lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + // We use os.Args to show all the arguments (not only passed-through Cobra). + lg.Info("Running: ", zap.Strings("args", os.Args)) + + srvs := discoverEndpoints(lg, gatewayDNSCluster, gatewayCA, gatewayInsecureDiscovery, gatewayDNSClusterServiceName) + if len(srvs.Endpoints) == 0 { + // no endpoints discovered, fall back to provided endpoints + srvs.Endpoints = gatewayEndpoints + } + // Strip the schema from the endpoints because we start just a TCP proxy + srvs.Endpoints = stripSchema(srvs.Endpoints) + if len(srvs.SRVs) == 0 { + for _, ep := range srvs.Endpoints { + h, p, serr := net.SplitHostPort(ep) + if serr != nil { + fmt.Printf("error parsing endpoint %q", ep) + os.Exit(1) + } + var port uint16 + fmt.Sscanf(p, "%d", &port) + srvs.SRVs = append(srvs.SRVs, &net.SRV{Target: h, Port: port}) + } + } + + lhost, lport, err := net.SplitHostPort(gatewayListenAddr) + if err != nil { + fmt.Println("failed to validate listen address:", gatewayListenAddr) + os.Exit(1) + } + + laddrs, err := net.LookupHost(lhost) + if err != nil { + fmt.Println("failed to resolve listen host:", lhost) + os.Exit(1) + } + laddrsMap := make(map[string]bool) + for _, addr := range laddrs { + laddrsMap[addr] = true + } + + for _, srv := range srvs.SRVs { + var eaddrs []string + eaddrs, err = net.LookupHost(srv.Target) + if err != nil { + fmt.Println("failed to resolve endpoint host:", srv.Target) + os.Exit(1) + } + if fmt.Sprintf("%d", srv.Port) != lport { + continue + } + + for _, ea := range eaddrs { + if laddrsMap[ea] { + fmt.Printf("SRV or endpoint (%s:%d->%s:%d) should not resolve to the gateway listen addr (%s)\n", srv.Target, srv.Port, ea, srv.Port, gatewayListenAddr) + os.Exit(1) + } + } + } + + if len(srvs.Endpoints) == 0 { + fmt.Println("no endpoints found") + os.Exit(1) + } + + var l net.Listener + l, err = net.Listen("tcp", gatewayListenAddr) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + tp := tcpproxy.TCPProxy{ + Logger: lg, + Listener: l, + Endpoints: srvs.SRVs, + MonitorInterval: gatewayRetryDelay, + } + + // At this point, etcd gateway listener is initialized + notifySystemd(lg) + + tp.Run() +} diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/grpc_proxy.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/grpc_proxy.go new file mode 100644 index 0000000000..f832ca5afa --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/grpc_proxy.go @@ -0,0 +1,535 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "io/ioutil" + "log" + "math" + "net" + "net/http" + "net/url" + "os" + "path/filepath" + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/pkg/v3/logutil" + "go.etcd.io/etcd/client/pkg/v3/transport" + clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/leasing" + "go.etcd.io/etcd/client/v3/namespace" + "go.etcd.io/etcd/client/v3/ordering" + "go.etcd.io/etcd/pkg/v3/debugutil" + "go.etcd.io/etcd/server/v3/embed" + "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb" + "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb" + "go.etcd.io/etcd/server/v3/proxy/grpcproxy" + "go.uber.org/zap/zapgrpc" + + grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" + "github.com/soheilhy/cmux" + "github.com/spf13/cobra" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/grpclog" + "google.golang.org/grpc/keepalive" +) + +var ( + grpcProxyListenAddr string + grpcProxyMetricsListenAddr string + grpcProxyEndpoints []string + grpcProxyDNSCluster string + grpcProxyDNSClusterServiceName string + grpcProxyInsecureDiscovery bool + grpcProxyDataDir string + grpcMaxCallSendMsgSize int + grpcMaxCallRecvMsgSize int + + // tls for connecting to etcd + + grpcProxyCA string + grpcProxyCert string + grpcProxyKey string + grpcProxyInsecureSkipTLSVerify bool + + // tls for clients connecting to proxy + + grpcProxyListenCA string + grpcProxyListenCert string + grpcProxyListenKey string + grpcProxyListenAutoTLS bool + grpcProxyListenCRL string + selfSignedCertValidity uint + + grpcProxyAdvertiseClientURL string + grpcProxyResolverPrefix string + grpcProxyResolverTTL int + + grpcProxyNamespace string + grpcProxyLeasing string + + grpcProxyEnablePprof bool + grpcProxyEnableOrdering bool + + grpcProxyDebug bool + + // GRPC keep alive related options. + grpcKeepAliveMinTime time.Duration + grpcKeepAliveTimeout time.Duration + grpcKeepAliveInterval time.Duration +) + +const defaultGRPCMaxCallSendMsgSize = 1.5 * 1024 * 1024 + +func init() { + rootCmd.AddCommand(newGRPCProxyCommand()) +} + +// newGRPCProxyCommand returns the cobra command for "grpc-proxy". +func newGRPCProxyCommand() *cobra.Command { + lpc := &cobra.Command{ + Use: "grpc-proxy ", + Short: "grpc-proxy related command", + } + lpc.AddCommand(newGRPCProxyStartCommand()) + + return lpc +} + +func newGRPCProxyStartCommand() *cobra.Command { + cmd := cobra.Command{ + Use: "start", + Short: "start the grpc proxy", + Run: startGRPCProxy, + } + + cmd.Flags().StringVar(&grpcProxyListenAddr, "listen-addr", "127.0.0.1:23790", "listen address") + cmd.Flags().StringVar(&grpcProxyDNSCluster, "discovery-srv", "", "domain name to query for SRV records describing cluster endpoints") + cmd.Flags().StringVar(&grpcProxyDNSClusterServiceName, "discovery-srv-name", "", "service name to query when using DNS discovery") + cmd.Flags().StringVar(&grpcProxyMetricsListenAddr, "metrics-addr", "", "listen for endpoint /metrics requests on an additional interface") + cmd.Flags().BoolVar(&grpcProxyInsecureDiscovery, "insecure-discovery", false, "accept insecure SRV records") + cmd.Flags().StringSliceVar(&grpcProxyEndpoints, "endpoints", []string{"127.0.0.1:2379"}, "comma separated etcd cluster endpoints") + cmd.Flags().StringVar(&grpcProxyAdvertiseClientURL, "advertise-client-url", "127.0.0.1:23790", "advertise address to register (must be reachable by client)") + cmd.Flags().StringVar(&grpcProxyResolverPrefix, "resolver-prefix", "", "prefix to use for registering proxy (must be shared with other grpc-proxy members)") + cmd.Flags().IntVar(&grpcProxyResolverTTL, "resolver-ttl", 0, "specify TTL, in seconds, when registering proxy endpoints") + cmd.Flags().StringVar(&grpcProxyNamespace, "namespace", "", "string to prefix to all keys for namespacing requests") + cmd.Flags().BoolVar(&grpcProxyEnablePprof, "enable-pprof", false, `Enable runtime profiling data via HTTP server. Address is at client URL + "/debug/pprof/"`) + cmd.Flags().StringVar(&grpcProxyDataDir, "data-dir", "default.proxy", "Data directory for persistent data") + cmd.Flags().IntVar(&grpcMaxCallSendMsgSize, "max-send-bytes", defaultGRPCMaxCallSendMsgSize, "message send limits in bytes (default value is 1.5 MiB)") + cmd.Flags().IntVar(&grpcMaxCallRecvMsgSize, "max-recv-bytes", math.MaxInt32, "message receive limits in bytes (default value is math.MaxInt32)") + cmd.Flags().DurationVar(&grpcKeepAliveMinTime, "grpc-keepalive-min-time", embed.DefaultGRPCKeepAliveMinTime, "Minimum interval duration that a client should wait before pinging proxy.") + cmd.Flags().DurationVar(&grpcKeepAliveInterval, "grpc-keepalive-interval", embed.DefaultGRPCKeepAliveInterval, "Frequency duration of server-to-client ping to check if a connection is alive (0 to disable).") + cmd.Flags().DurationVar(&grpcKeepAliveTimeout, "grpc-keepalive-timeout", embed.DefaultGRPCKeepAliveTimeout, "Additional duration of wait before closing a non-responsive connection (0 to disable).") + + // client TLS for connecting to server + cmd.Flags().StringVar(&grpcProxyCert, "cert", "", "identify secure connections with etcd servers using this TLS certificate file") + cmd.Flags().StringVar(&grpcProxyKey, "key", "", "identify secure connections with etcd servers using this TLS key file") + cmd.Flags().StringVar(&grpcProxyCA, "cacert", "", "verify certificates of TLS-enabled secure etcd servers using this CA bundle") + cmd.Flags().BoolVar(&grpcProxyInsecureSkipTLSVerify, "insecure-skip-tls-verify", false, "skip authentication of etcd server TLS certificates (CAUTION: this option should be enabled only for testing purposes)") + + // client TLS for connecting to proxy + cmd.Flags().StringVar(&grpcProxyListenCert, "cert-file", "", "identify secure connections to the proxy using this TLS certificate file") + cmd.Flags().StringVar(&grpcProxyListenKey, "key-file", "", "identify secure connections to the proxy using this TLS key file") + cmd.Flags().StringVar(&grpcProxyListenCA, "trusted-ca-file", "", "verify certificates of TLS-enabled secure proxy using this CA bundle") + cmd.Flags().BoolVar(&grpcProxyListenAutoTLS, "auto-tls", false, "proxy TLS using generated certificates") + cmd.Flags().StringVar(&grpcProxyListenCRL, "client-crl-file", "", "proxy client certificate revocation list file.") + cmd.Flags().UintVar(&selfSignedCertValidity, "self-signed-cert-validity", 1, "The validity period of the proxy certificates, unit is year") + + // experimental flags + cmd.Flags().BoolVar(&grpcProxyEnableOrdering, "experimental-serializable-ordering", false, "Ensure serializable reads have monotonically increasing store revisions across endpoints.") + cmd.Flags().StringVar(&grpcProxyLeasing, "experimental-leasing-prefix", "", "leasing metadata prefix for disconnected linearized reads.") + + cmd.Flags().BoolVar(&grpcProxyDebug, "debug", false, "Enable debug-level logging for grpc-proxy.") + + return &cmd +} + +func startGRPCProxy(cmd *cobra.Command, args []string) { + checkArgs() + lvl := zap.InfoLevel + if grpcProxyDebug { + lvl = zap.DebugLevel + grpc.EnableTracing = true + } + lg, err := logutil.CreateDefaultZapLogger(lvl) + if err != nil { + panic(err) + } + defer lg.Sync() + + grpclog.SetLoggerV2(zapgrpc.NewLogger(lg)) + + // The proxy itself (ListenCert) can have not-empty CN. + // The empty CN is required for grpcProxyCert. + // Please see https://github.com/etcd-io/etcd/issues/11970#issuecomment-687875315 for more context. + tlsinfo := newTLS(grpcProxyListenCA, grpcProxyListenCert, grpcProxyListenKey, false) + + if tlsinfo == nil && grpcProxyListenAutoTLS { + host := []string{"https://" + grpcProxyListenAddr} + dir := filepath.Join(grpcProxyDataDir, "fixtures", "proxy") + autoTLS, err := transport.SelfCert(lg, dir, host, selfSignedCertValidity) + if err != nil { + log.Fatal(err) + } + tlsinfo = &autoTLS + } + if tlsinfo != nil { + lg.Info("gRPC proxy server TLS", zap.String("tls-info", fmt.Sprintf("%+v", tlsinfo))) + } + m := mustListenCMux(lg, tlsinfo) + grpcl := m.Match(cmux.HTTP2()) + defer func() { + grpcl.Close() + lg.Info("stop listening gRPC proxy client requests", zap.String("address", grpcProxyListenAddr)) + }() + + client := mustNewClient(lg) + + // The proxy client is used for self-healthchecking. + // TODO: The mechanism should be refactored to use internal connection. + var proxyClient *clientv3.Client + if grpcProxyAdvertiseClientURL != "" { + proxyClient = mustNewProxyClient(lg, tlsinfo) + } + httpClient := mustNewHTTPClient(lg) + + srvhttp, httpl := mustHTTPListener(lg, m, tlsinfo, client, proxyClient) + errc := make(chan error, 3) + go func() { errc <- newGRPCProxyServer(lg, client).Serve(grpcl) }() + go func() { errc <- srvhttp.Serve(httpl) }() + go func() { errc <- m.Serve() }() + if len(grpcProxyMetricsListenAddr) > 0 { + mhttpl := mustMetricsListener(lg, tlsinfo) + go func() { + mux := http.NewServeMux() + grpcproxy.HandleMetrics(mux, httpClient, client.Endpoints()) + grpcproxy.HandleHealth(lg, mux, client) + grpcproxy.HandleProxyMetrics(mux) + grpcproxy.HandleProxyHealth(lg, mux, proxyClient) + lg.Info("gRPC proxy server metrics URL serving") + herr := http.Serve(mhttpl, mux) + if herr != nil { + lg.Fatal("gRPC proxy server metrics URL returned", zap.Error(herr)) + } else { + lg.Info("gRPC proxy server metrics URL returned") + } + }() + } + + lg.Info("started gRPC proxy", zap.String("address", grpcProxyListenAddr)) + + // grpc-proxy is initialized, ready to serve + notifySystemd(lg) + + fmt.Fprintln(os.Stderr, <-errc) + os.Exit(1) +} + +func checkArgs() { + if grpcProxyResolverPrefix != "" && grpcProxyResolverTTL < 1 { + fmt.Fprintln(os.Stderr, fmt.Errorf("invalid resolver-ttl %d", grpcProxyResolverTTL)) + os.Exit(1) + } + if grpcProxyResolverPrefix == "" && grpcProxyResolverTTL > 0 { + fmt.Fprintln(os.Stderr, fmt.Errorf("invalid resolver-prefix %q", grpcProxyResolverPrefix)) + os.Exit(1) + } + if grpcProxyResolverPrefix != "" && grpcProxyResolverTTL > 0 && grpcProxyAdvertiseClientURL == "" { + fmt.Fprintln(os.Stderr, fmt.Errorf("invalid advertise-client-url %q", grpcProxyAdvertiseClientURL)) + os.Exit(1) + } + if grpcProxyListenAutoTLS && selfSignedCertValidity == 0 { + fmt.Fprintln(os.Stderr, fmt.Errorf("selfSignedCertValidity is invalid,it should be greater than 0")) + os.Exit(1) + } +} + +func mustNewClient(lg *zap.Logger) *clientv3.Client { + srvs := discoverEndpoints(lg, grpcProxyDNSCluster, grpcProxyCA, grpcProxyInsecureDiscovery, grpcProxyDNSClusterServiceName) + eps := srvs.Endpoints + if len(eps) == 0 { + eps = grpcProxyEndpoints + } + cfg, err := newClientCfg(lg, eps) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + cfg.DialOptions = append(cfg.DialOptions, + grpc.WithUnaryInterceptor(grpcproxy.AuthUnaryClientInterceptor)) + cfg.DialOptions = append(cfg.DialOptions, + grpc.WithStreamInterceptor(grpcproxy.AuthStreamClientInterceptor)) + cfg.Logger = lg.Named("client") + client, err := clientv3.New(*cfg) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + return client +} + +func mustNewProxyClient(lg *zap.Logger, tls *transport.TLSInfo) *clientv3.Client { + eps := []string{grpcProxyAdvertiseClientURL} + cfg, err := newProxyClientCfg(lg.Named("client"), eps, tls) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + client, err := clientv3.New(*cfg) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + lg.Info("create proxy client", zap.String("grpcProxyAdvertiseClientURL", grpcProxyAdvertiseClientURL)) + return client +} + +func newProxyClientCfg(lg *zap.Logger, eps []string, tls *transport.TLSInfo) (*clientv3.Config, error) { + cfg := clientv3.Config{ + Endpoints: eps, + DialTimeout: 5 * time.Second, + Logger: lg, + } + if tls != nil { + clientTLS, err := tls.ClientConfig() + if err != nil { + return nil, err + } + cfg.TLS = clientTLS + } + return &cfg, nil +} + +func newClientCfg(lg *zap.Logger, eps []string) (*clientv3.Config, error) { + // set tls if any one tls option set + cfg := clientv3.Config{ + Endpoints: eps, + DialTimeout: 5 * time.Second, + } + + if grpcMaxCallSendMsgSize > 0 { + cfg.MaxCallSendMsgSize = grpcMaxCallSendMsgSize + } + if grpcMaxCallRecvMsgSize > 0 { + cfg.MaxCallRecvMsgSize = grpcMaxCallRecvMsgSize + } + + tls := newTLS(grpcProxyCA, grpcProxyCert, grpcProxyKey, true) + if tls == nil && grpcProxyInsecureSkipTLSVerify { + tls = &transport.TLSInfo{} + } + if tls != nil { + clientTLS, err := tls.ClientConfig() + if err != nil { + return nil, err + } + clientTLS.InsecureSkipVerify = grpcProxyInsecureSkipTLSVerify + if clientTLS.InsecureSkipVerify { + lg.Warn("--insecure-skip-tls-verify was given, this grpc proxy process skips authentication of etcd server TLS certificates. This option should be enabled only for testing purposes.") + } + cfg.TLS = clientTLS + lg.Info("gRPC proxy client TLS", zap.String("tls-info", fmt.Sprintf("%+v", tls))) + } + return &cfg, nil +} + +func newTLS(ca, cert, key string, requireEmptyCN bool) *transport.TLSInfo { + if ca == "" && cert == "" && key == "" { + return nil + } + return &transport.TLSInfo{TrustedCAFile: ca, CertFile: cert, KeyFile: key} +} + +func mustListenCMux(lg *zap.Logger, tlsinfo *transport.TLSInfo) cmux.CMux { + l, err := net.Listen("tcp", grpcProxyListenAddr) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + if l, err = transport.NewKeepAliveListener(l, "tcp", nil); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if tlsinfo != nil { + tlsinfo.CRLFile = grpcProxyListenCRL + if l, err = transport.NewTLSListener(l, tlsinfo); err != nil { + lg.Fatal("failed to create TLS listener", zap.Error(err)) + } + } + + lg.Info("listening for gRPC proxy client requests", zap.String("address", grpcProxyListenAddr)) + return cmux.New(l) +} + +func newGRPCProxyServer(lg *zap.Logger, client *clientv3.Client) *grpc.Server { + if grpcProxyEnableOrdering { + vf := ordering.NewOrderViolationSwitchEndpointClosure(client) + client.KV = ordering.NewKV(client.KV, vf) + lg.Info("waiting for linearized read from cluster to recover ordering") + for { + _, err := client.KV.Get(context.TODO(), "_", clientv3.WithKeysOnly()) + if err == nil { + break + } + lg.Warn("ordering recovery failed, retrying in 1s", zap.Error(err)) + time.Sleep(time.Second) + } + } + + if len(grpcProxyNamespace) > 0 { + client.KV = namespace.NewKV(client.KV, grpcProxyNamespace) + client.Watcher = namespace.NewWatcher(client.Watcher, grpcProxyNamespace) + client.Lease = namespace.NewLease(client.Lease, grpcProxyNamespace) + } + + if len(grpcProxyLeasing) > 0 { + client.KV, _, _ = leasing.NewKV(client, grpcProxyLeasing) + } + + kvp, _ := grpcproxy.NewKvProxy(client) + watchp, _ := grpcproxy.NewWatchProxy(client.Ctx(), lg, client) + if grpcProxyResolverPrefix != "" { + grpcproxy.Register(lg, client, grpcProxyResolverPrefix, grpcProxyAdvertiseClientURL, grpcProxyResolverTTL) + } + clusterp, _ := grpcproxy.NewClusterProxy(lg, client, grpcProxyAdvertiseClientURL, grpcProxyResolverPrefix) + leasep, _ := grpcproxy.NewLeaseProxy(client.Ctx(), client) + + mainp := grpcproxy.NewMaintenanceProxy(client) + authp := grpcproxy.NewAuthProxy(client) + electionp := grpcproxy.NewElectionProxy(client) + lockp := grpcproxy.NewLockProxy(client) + + gopts := []grpc.ServerOption{ + grpc.StreamInterceptor(grpc_prometheus.StreamServerInterceptor), + grpc.UnaryInterceptor(grpc_prometheus.UnaryServerInterceptor), + grpc.MaxConcurrentStreams(math.MaxUint32), + } + if grpcKeepAliveMinTime > time.Duration(0) { + gopts = append(gopts, grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ + MinTime: grpcKeepAliveMinTime, + PermitWithoutStream: false, + })) + } + if grpcKeepAliveInterval > time.Duration(0) || + grpcKeepAliveTimeout > time.Duration(0) { + gopts = append(gopts, grpc.KeepaliveParams(keepalive.ServerParameters{ + Time: grpcKeepAliveInterval, + Timeout: grpcKeepAliveTimeout, + })) + } + + server := grpc.NewServer(gopts...) + + pb.RegisterKVServer(server, kvp) + pb.RegisterWatchServer(server, watchp) + pb.RegisterClusterServer(server, clusterp) + pb.RegisterLeaseServer(server, leasep) + pb.RegisterMaintenanceServer(server, mainp) + pb.RegisterAuthServer(server, authp) + v3electionpb.RegisterElectionServer(server, electionp) + v3lockpb.RegisterLockServer(server, lockp) + + return server +} + +func mustHTTPListener(lg *zap.Logger, m cmux.CMux, tlsinfo *transport.TLSInfo, c *clientv3.Client, proxy *clientv3.Client) (*http.Server, net.Listener) { + httpClient := mustNewHTTPClient(lg) + httpmux := http.NewServeMux() + httpmux.HandleFunc("/", http.NotFound) + grpcproxy.HandleMetrics(httpmux, httpClient, c.Endpoints()) + grpcproxy.HandleHealth(lg, httpmux, c) + grpcproxy.HandleProxyMetrics(httpmux) + grpcproxy.HandleProxyHealth(lg, httpmux, proxy) + if grpcProxyEnablePprof { + for p, h := range debugutil.PProfHandlers() { + httpmux.Handle(p, h) + } + lg.Info("gRPC proxy enabled pprof", zap.String("path", debugutil.HTTPPrefixPProf)) + } + srvhttp := &http.Server{ + Handler: httpmux, + ErrorLog: log.New(ioutil.Discard, "net/http", 0), + } + + if tlsinfo == nil { + return srvhttp, m.Match(cmux.HTTP1()) + } + + srvTLS, err := tlsinfo.ServerConfig() + if err != nil { + lg.Fatal("failed to set up TLS", zap.Error(err)) + } + srvhttp.TLSConfig = srvTLS + return srvhttp, m.Match(cmux.Any()) +} + +func mustNewHTTPClient(lg *zap.Logger) *http.Client { + transport, err := newHTTPTransport(grpcProxyCA, grpcProxyCert, grpcProxyKey) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + return &http.Client{Transport: transport} +} + +func newHTTPTransport(ca, cert, key string) (*http.Transport, error) { + tr := &http.Transport{} + + if ca != "" && cert != "" && key != "" { + caCert, err := ioutil.ReadFile(ca) + if err != nil { + return nil, err + } + keyPair, err := tls.LoadX509KeyPair(cert, key) + if err != nil { + return nil, err + } + caPool := x509.NewCertPool() + caPool.AppendCertsFromPEM(caCert) + + tlsConfig := &tls.Config{ + Certificates: []tls.Certificate{keyPair}, + RootCAs: caPool, + } + tlsConfig.BuildNameToCertificate() + tr.TLSClientConfig = tlsConfig + } else if grpcProxyInsecureSkipTLSVerify { + tlsConfig := &tls.Config{InsecureSkipVerify: grpcProxyInsecureSkipTLSVerify} + tr.TLSClientConfig = tlsConfig + } + return tr, nil +} + +func mustMetricsListener(lg *zap.Logger, tlsinfo *transport.TLSInfo) net.Listener { + murl, err := url.Parse(grpcProxyMetricsListenAddr) + if err != nil { + fmt.Fprintf(os.Stderr, "cannot parse %q", grpcProxyMetricsListenAddr) + os.Exit(1) + } + ml, err := transport.NewListener(murl.Host, murl.Scheme, tlsinfo) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + lg.Info("gRPC proxy listening for metrics", zap.String("address", murl.String())) + return ml +} diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/help.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/help.go new file mode 100644 index 0000000000..56ca5285c4 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/help.go @@ -0,0 +1,262 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "fmt" + "strconv" + + cconfig "go.etcd.io/etcd/server/v3/config" + "go.etcd.io/etcd/server/v3/embed" + "golang.org/x/crypto/bcrypt" +) + +var ( + usageline = `Usage: + + etcd [flags] + Start an etcd server. + + etcd --version + Show the version of etcd. + + etcd -h | --help + Show the help information about etcd. + + etcd --config-file + Path to the server configuration file. Note that if a configuration file is provided, other command line flags and environment variables will be ignored. + + etcd gateway + Run the stateless pass-through etcd TCP connection forwarding proxy. + + etcd grpc-proxy + Run the stateless etcd v3 gRPC L7 reverse proxy. +` + flagsline = ` +Member: + --name 'default' + Human-readable name for this member. + --data-dir '${name}.etcd' + Path to the data directory. + --wal-dir '' + Path to the dedicated wal directory. + --snapshot-count '100000' + Number of committed transactions to trigger a snapshot to disk. + --heartbeat-interval '100' + Time (in milliseconds) of a heartbeat interval. + --election-timeout '1000' + Time (in milliseconds) for an election to timeout. See tuning documentation for details. + --initial-election-tick-advance 'true' + Whether to fast-forward initial election ticks on boot for faster election. + --listen-peer-urls 'http://localhost:2380' + List of URLs to listen on for peer traffic. + --listen-client-urls 'http://localhost:2379' + List of URLs to listen on for client traffic. + --max-snapshots '` + strconv.Itoa(embed.DefaultMaxSnapshots) + `' + Maximum number of snapshot files to retain (0 is unlimited). + --max-wals '` + strconv.Itoa(embed.DefaultMaxWALs) + `' + Maximum number of wal files to retain (0 is unlimited). + --quota-backend-bytes '0' + Raise alarms when backend size exceeds the given quota (0 defaults to low space quota). + --backend-bbolt-freelist-type 'map' + BackendFreelistType specifies the type of freelist that boltdb backend uses(array and map are supported types). + --backend-batch-interval '' + BackendBatchInterval is the maximum time before commit the backend transaction. + --backend-batch-limit '0' + BackendBatchLimit is the maximum operations before commit the backend transaction. + --max-txn-ops '128' + Maximum number of operations permitted in a transaction. + --max-request-bytes '1572864' + Maximum client request size in bytes the server will accept. + --grpc-keepalive-min-time '5s' + Minimum duration interval that a client should wait before pinging server. + --grpc-keepalive-interval '2h' + Frequency duration of server-to-client ping to check if a connection is alive (0 to disable). + --grpc-keepalive-timeout '20s' + Additional duration of wait before closing a non-responsive connection (0 to disable). + --socket-reuse-port 'false' + Enable to set socket option SO_REUSEPORT on listeners allowing rebinding of a port already in use. + --socket-reuse-address 'false' + Enable to set socket option SO_REUSEADDR on listeners allowing binding to an address in TIME_WAIT state. + +Clustering: + --initial-advertise-peer-urls 'http://localhost:2380' + List of this member's peer URLs to advertise to the rest of the cluster. + --initial-cluster 'default=http://localhost:2380' + Initial cluster configuration for bootstrapping. + --initial-cluster-state 'new' + Initial cluster state ('new' or 'existing'). + --initial-cluster-token 'etcd-cluster' + Initial cluster token for the etcd cluster during bootstrap. + Specifying this can protect you from unintended cross-cluster interaction when running multiple clusters. + --advertise-client-urls 'http://localhost:2379' + List of this member's client URLs to advertise to the public. + The client URLs advertised should be accessible to machines that talk to etcd cluster. etcd client libraries parse these URLs to connect to the cluster. + --discovery '' + Discovery URL used to bootstrap the cluster. + --discovery-fallback 'proxy' + Expected behavior ('exit' or 'proxy') when discovery services fails. + "proxy" supports v2 API only. + --discovery-proxy '' + HTTP proxy to use for traffic to discovery service. + --discovery-srv '' + DNS srv domain used to bootstrap the cluster. + --discovery-srv-name '' + Suffix to the dns srv name queried when bootstrapping. + --strict-reconfig-check '` + strconv.FormatBool(embed.DefaultStrictReconfigCheck) + `' + Reject reconfiguration requests that would cause quorum loss. + --pre-vote 'true' + Enable to run an additional Raft election phase. + --auto-compaction-retention '0' + Auto compaction retention length. 0 means disable auto compaction. + --auto-compaction-mode 'periodic' + Interpret 'auto-compaction-retention' one of: periodic|revision. 'periodic' for duration based retention, defaulting to hours if no time unit is provided (e.g. '5m'). 'revision' for revision number based retention. + --enable-v2 '` + strconv.FormatBool(embed.DefaultEnableV2) + `' + Accept etcd V2 client requests. Deprecated and to be decommissioned in v3.6. + --v2-deprecation '` + string(cconfig.V2_DEPR_DEFAULT) + `' + Phase of v2store deprecation. Allows to opt-in for higher compatibility mode. + Supported values: + 'not-yet' // Issues a warning if v2store have meaningful content (default in v3.5) + 'write-only' // Custom v2 state is not allowed (planned default in v3.6) + 'write-only-drop-data' // Custom v2 state will get DELETED ! + 'gone' // v2store is not maintained any longer. (planned default in v3.7) + +Security: + --cert-file '' + Path to the client server TLS cert file. + --key-file '' + Path to the client server TLS key file. + --client-cert-auth 'false' + Enable client cert authentication. + --client-crl-file '' + Path to the client certificate revocation list file. + --client-cert-allowed-hostname '' + Allowed TLS hostname for client cert authentication. + --trusted-ca-file '' + Path to the client server TLS trusted CA cert file. + --auto-tls 'false' + Client TLS using generated certificates. + --peer-cert-file '' + Path to the peer server TLS cert file. + --peer-key-file '' + Path to the peer server TLS key file. + --peer-client-cert-auth 'false' + Enable peer client cert authentication. + --peer-trusted-ca-file '' + Path to the peer server TLS trusted CA file. + --peer-cert-allowed-cn '' + Required CN for client certs connecting to the peer endpoint. + --peer-cert-allowed-hostname '' + Allowed TLS hostname for inter peer authentication. + --peer-auto-tls 'false' + Peer TLS using self-generated certificates if --peer-key-file and --peer-cert-file are not provided. + --self-signed-cert-validity '1' + The validity period of the client and peer certificates that are automatically generated by etcd when you specify ClientAutoTLS and PeerAutoTLS, the unit is year, and the default is 1. + --peer-crl-file '' + Path to the peer certificate revocation list file. + --cipher-suites '' + Comma-separated list of supported TLS cipher suites between client/server and peers (empty will be auto-populated by Go). + --cors '*' + Comma-separated whitelist of origins for CORS, or cross-origin resource sharing, (empty or * means allow all). + --host-whitelist '*' + Acceptable hostnames from HTTP client requests, if server is not secure (empty or * means allow all). + +Auth: + --auth-token 'simple' + Specify a v3 authentication token type and its options ('simple' or 'jwt'). + --bcrypt-cost ` + fmt.Sprintf("%d", bcrypt.DefaultCost) + ` + Specify the cost / strength of the bcrypt algorithm for hashing auth passwords. Valid values are between ` + fmt.Sprintf("%d", bcrypt.MinCost) + ` and ` + fmt.Sprintf("%d", bcrypt.MaxCost) + `. + --auth-token-ttl 300 + Time (in seconds) of the auth-token-ttl. + +Profiling and Monitoring: + --enable-pprof 'false' + Enable runtime profiling data via HTTP server. Address is at client URL + "/debug/pprof/" + --metrics 'basic' + Set level of detail for exported metrics, specify 'extensive' to include server side grpc histogram metrics. + --listen-metrics-urls '' + List of URLs to listen on for the metrics and health endpoints. + +Logging: + --logger 'zap' + Currently only supports 'zap' for structured logging. + --log-outputs 'default' + Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd, or list of comma separated output targets. + --log-level 'info' + Configures log level. Only supports debug, info, warn, error, panic, or fatal. + --enable-log-rotation 'false' + Enable log rotation of a single log-outputs file target. + --log-rotation-config-json '{"maxsize": 100, "maxage": 0, "maxbackups": 0, "localtime": false, "compress": false}' + Configures log rotation if enabled with a JSON logger config. MaxSize(MB), MaxAge(days,0=no limit), MaxBackups(0=no limit), LocalTime(use computers local time), Compress(gzip)". + +Experimental distributed tracing: + --experimental-enable-distributed-tracing 'false' + Enable experimental distributed tracing. + --experimental-distributed-tracing-address 'localhost:4317' + Distributed tracing collector address. + --experimental-distributed-tracing-service-name 'etcd' + Distributed tracing service name, must be same across all etcd instances. + --experimental-distributed-tracing-instance-id '' + Distributed tracing instance ID, must be unique per each etcd instance. + +v2 Proxy (to be deprecated in v3.6): + --proxy 'off' + Proxy mode setting ('off', 'readonly' or 'on'). + --proxy-failure-wait 5000 + Time (in milliseconds) an endpoint will be held in a failed state. + --proxy-refresh-interval 30000 + Time (in milliseconds) of the endpoints refresh interval. + --proxy-dial-timeout 1000 + Time (in milliseconds) for a dial to timeout. + --proxy-write-timeout 5000 + Time (in milliseconds) for a write to timeout. + --proxy-read-timeout 0 + Time (in milliseconds) for a read to timeout. + +Experimental feature: + --experimental-initial-corrupt-check 'false' + Enable to check data corruption before serving any client/peer traffic. + --experimental-corrupt-check-time '0s' + Duration of time between cluster corruption check passes. + --experimental-enable-v2v3 '' + Serve v2 requests through the v3 backend under a given prefix. Deprecated and to be decommissioned in v3.6. + --experimental-enable-lease-checkpoint 'false' + ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. + --experimental-compaction-batch-limit 1000 + ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch. + --experimental-peer-skip-client-san-verification 'false' + Skip verification of SAN field in client certificate for peer connections. + --experimental-watch-progress-notify-interval '10m' + Duration of periodical watch progress notification. + --experimental-warning-apply-duration '100ms' + Warning is generated if requests take more than this duration. + --experimental-txn-mode-write-with-shared-buffer 'true' + Enable the write transaction to use a shared buffer in its readonly check operations. + --experimental-bootstrap-defrag-threshold-megabytes + Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect. + --experimental-max-learners '1' + Set the max number of learner members allowed in the cluster membership. + +Unsafe feature: + --force-new-cluster 'false' + Force to create a new one-member cluster. + --unsafe-no-fsync 'false' + Disables fsync, unsafe, will cause data loss. + +CAUTIOUS with unsafe flag! It may break the guarantees given by the consensus protocol! +` +) + +// Add back "TO BE DEPRECATED" section if needed diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/main.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/main.go new file mode 100644 index 0000000000..e28e7da928 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/main.go @@ -0,0 +1,51 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "fmt" + "os" + + "github.com/coreos/go-systemd/v22/daemon" + "go.uber.org/zap" +) + +func Main(args []string) { + checkSupportArch() + + if len(args) > 1 { + cmd := args[1] + switch cmd { + case "gateway", "grpc-proxy": + if err := rootCmd.Execute(); err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + return + } + } + + startEtcdOrProxyV2(args) +} + +func notifySystemd(lg *zap.Logger) { + lg.Info("notifying init daemon") + _, err := daemon.SdNotify(false, daemon.SdNotifyReady) + if err != nil { + lg.Error("failed to notify systemd for readiness", zap.Error(err)) + return + } + lg.Info("successfully notified init daemon") +} diff --git a/vendor/go.etcd.io/etcd/server/v3/etcdmain/util.go b/vendor/go.etcd.io/etcd/server/v3/etcdmain/util.go new file mode 100644 index 0000000000..0bd23e9e59 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/etcdmain/util.go @@ -0,0 +1,97 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdmain + +import ( + "fmt" + "os" + + "go.etcd.io/etcd/client/pkg/v3/srv" + "go.etcd.io/etcd/client/pkg/v3/transport" + + "go.uber.org/zap" +) + +func discoverEndpoints(lg *zap.Logger, dns string, ca string, insecure bool, serviceName string) (s srv.SRVClients) { + if dns == "" { + return s + } + srvs, err := srv.GetClient("etcd-client", dns, serviceName) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + endpoints := srvs.Endpoints + + if lg != nil { + lg.Info( + "discovered cluster from SRV", + zap.String("srv-server", dns), + zap.Strings("endpoints", endpoints), + ) + } + + if insecure { + return *srvs + } + // confirm TLS connections are good + tlsInfo := transport.TLSInfo{ + TrustedCAFile: ca, + ServerName: dns, + } + + if lg != nil { + lg.Info( + "validating discovered SRV endpoints", + zap.String("srv-server", dns), + zap.Strings("endpoints", endpoints), + ) + } + + endpoints, err = transport.ValidateSecureEndpoints(tlsInfo, endpoints) + if err != nil { + if lg != nil { + lg.Warn( + "failed to validate discovered endpoints", + zap.String("srv-server", dns), + zap.Strings("endpoints", endpoints), + zap.Error(err), + ) + } + } else { + if lg != nil { + lg.Info( + "using validated discovered SRV endpoints", + zap.String("srv-server", dns), + zap.Strings("endpoints", endpoints), + ) + } + } + + // map endpoints back to SRVClients struct with SRV data + eps := make(map[string]struct{}) + for _, ep := range endpoints { + eps[ep] = struct{}{} + } + for i := range srvs.Endpoints { + if _, ok := eps[srvs.Endpoints[i]]; !ok { + continue + } + s.Endpoints = append(s.Endpoints, srvs.Endpoints[i]) + s.SRVs = append(s.SRVs, srvs.SRVs[i]) + } + + return s +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/auth.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/auth.go new file mode 100644 index 0000000000..0cabfc146e --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/auth.go @@ -0,0 +1,115 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/v3" +) + +type AuthProxy struct { + client *clientv3.Client +} + +func NewAuthProxy(c *clientv3.Client) pb.AuthServer { + return &AuthProxy{client: c} +} + +func (ap *AuthProxy) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).AuthEnable(ctx, r) +} + +func (ap *AuthProxy) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).AuthDisable(ctx, r) +} + +func (ap *AuthProxy) AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).AuthStatus(ctx, r) +} + +func (ap *AuthProxy) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).Authenticate(ctx, r) +} + +func (ap *AuthProxy) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleAdd(ctx, r) +} + +func (ap *AuthProxy) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleDelete(ctx, r) +} + +func (ap *AuthProxy) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleGet(ctx, r) +} + +func (ap *AuthProxy) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleList(ctx, r) +} + +func (ap *AuthProxy) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleRevokePermission(ctx, r) +} + +func (ap *AuthProxy) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).RoleGrantPermission(ctx, r) +} + +func (ap *AuthProxy) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserAdd(ctx, r) +} + +func (ap *AuthProxy) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserDelete(ctx, r) +} + +func (ap *AuthProxy) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserGet(ctx, r) +} + +func (ap *AuthProxy) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserList(ctx, r) +} + +func (ap *AuthProxy) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserGrantRole(ctx, r) +} + +func (ap *AuthProxy) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserRevokeRole(ctx, r) +} + +func (ap *AuthProxy) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) { + conn := ap.client.ActiveConnection() + return pb.NewAuthClient(conn).UserChangePassword(ctx, r) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache/store.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache/store.go new file mode 100644 index 0000000000..2c189644a8 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache/store.go @@ -0,0 +1,172 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cache exports functionality for efficiently caching and mapping +// `RangeRequest`s to corresponding `RangeResponse`s. +package cache + +import ( + "errors" + "sync" + + "github.com/golang/groupcache/lru" + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/pkg/v3/adt" +) + +var ( + DefaultMaxEntries = 2048 + ErrCompacted = rpctypes.ErrGRPCCompacted +) + +type Cache interface { + Add(req *pb.RangeRequest, resp *pb.RangeResponse) + Get(req *pb.RangeRequest) (*pb.RangeResponse, error) + Compact(revision int64) + Invalidate(key []byte, endkey []byte) + Size() int + Close() +} + +// keyFunc returns the key of a request, which is used to look up its caching response in the cache. +func keyFunc(req *pb.RangeRequest) string { + // TODO: use marshalTo to reduce allocation + b, err := req.Marshal() + if err != nil { + panic(err) + } + return string(b) +} + +func NewCache(maxCacheEntries int) Cache { + return &cache{ + lru: lru.New(maxCacheEntries), + cachedRanges: adt.NewIntervalTree(), + compactedRev: -1, + } +} + +func (c *cache) Close() {} + +// cache implements Cache +type cache struct { + mu sync.RWMutex + lru *lru.Cache + + // a reverse index for cache invalidation + cachedRanges adt.IntervalTree + + compactedRev int64 +} + +// Add adds the response of a request to the cache if its revision is larger than the compacted revision of the cache. +func (c *cache) Add(req *pb.RangeRequest, resp *pb.RangeResponse) { + key := keyFunc(req) + + c.mu.Lock() + defer c.mu.Unlock() + + if req.Revision > c.compactedRev { + c.lru.Add(key, resp) + } + // we do not need to invalidate a request with a revision specified. + // so we do not need to add it into the reverse index. + if req.Revision != 0 { + return + } + + var ( + iv *adt.IntervalValue + ivl adt.Interval + ) + if len(req.RangeEnd) != 0 { + ivl = adt.NewStringAffineInterval(string(req.Key), string(req.RangeEnd)) + } else { + ivl = adt.NewStringAffinePoint(string(req.Key)) + } + + iv = c.cachedRanges.Find(ivl) + + if iv == nil { + val := map[string]struct{}{key: {}} + c.cachedRanges.Insert(ivl, val) + } else { + val := iv.Val.(map[string]struct{}) + val[key] = struct{}{} + iv.Val = val + } +} + +// Get looks up the caching response for a given request. +// Get is also responsible for lazy eviction when accessing compacted entries. +func (c *cache) Get(req *pb.RangeRequest) (*pb.RangeResponse, error) { + key := keyFunc(req) + + c.mu.Lock() + defer c.mu.Unlock() + + if req.Revision > 0 && req.Revision < c.compactedRev { + c.lru.Remove(key) + return nil, ErrCompacted + } + + if resp, ok := c.lru.Get(key); ok { + return resp.(*pb.RangeResponse), nil + } + return nil, errors.New("not exist") +} + +// Invalidate invalidates the cache entries that intersecting with the given range from key to endkey. +func (c *cache) Invalidate(key, endkey []byte) { + c.mu.Lock() + defer c.mu.Unlock() + + var ( + ivs []*adt.IntervalValue + ivl adt.Interval + ) + if len(endkey) == 0 { + ivl = adt.NewStringAffinePoint(string(key)) + } else { + ivl = adt.NewStringAffineInterval(string(key), string(endkey)) + } + + ivs = c.cachedRanges.Stab(ivl) + for _, iv := range ivs { + keys := iv.Val.(map[string]struct{}) + for key := range keys { + c.lru.Remove(key) + } + } + // delete after removing all keys since it is destructive to 'ivs' + c.cachedRanges.Delete(ivl) +} + +// Compact invalidate all caching response before the given rev. +// Replace with the invalidation is lazy. The actual removal happens when the entries is accessed. +func (c *cache) Compact(revision int64) { + c.mu.Lock() + defer c.mu.Unlock() + + if revision > c.compactedRev { + c.compactedRev = revision + } +} + +func (c *cache) Size() int { + c.mu.RLock() + defer c.mu.RUnlock() + return c.lru.Len() +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cluster.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cluster.go new file mode 100644 index 0000000000..1f7dccbe74 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/cluster.go @@ -0,0 +1,213 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "errors" + "fmt" + "os" + "sync" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/naming/endpoints" + "golang.org/x/time/rate" + + "go.uber.org/zap" +) + +// allow maximum 1 retry per second +const resolveRetryRate = 1 + +type clusterProxy struct { + lg *zap.Logger + clus clientv3.Cluster + ctx context.Context + + // advertise client URL + advaddr string + prefix string + + em endpoints.Manager + + umu sync.RWMutex + umap map[string]endpoints.Endpoint +} + +// NewClusterProxy takes optional prefix to fetch grpc-proxy member endpoints. +// The returned channel is closed when there is grpc-proxy endpoint registered +// and the client's context is canceled so the 'register' loop returns. +// TODO: Expand the API to report creation errors +func NewClusterProxy(lg *zap.Logger, c *clientv3.Client, advaddr string, prefix string) (pb.ClusterServer, <-chan struct{}) { + if lg == nil { + lg = zap.NewNop() + } + + var em endpoints.Manager + if advaddr != "" && prefix != "" { + var err error + if em, err = endpoints.NewManager(c, prefix); err != nil { + lg.Error("failed to provision endpointsManager", zap.String("prefix", prefix), zap.Error(err)) + return nil, nil + } + } + + cp := &clusterProxy{ + lg: lg, + clus: c.Cluster, + ctx: c.Ctx(), + + advaddr: advaddr, + prefix: prefix, + umap: make(map[string]endpoints.Endpoint), + em: em, + } + + donec := make(chan struct{}) + if em != nil { + go func() { + defer close(donec) + cp.establishEndpointWatch(prefix) + }() + return cp, donec + } + + close(donec) + return cp, donec +} + +func (cp *clusterProxy) establishEndpointWatch(prefix string) { + rm := rate.NewLimiter(rate.Limit(resolveRetryRate), resolveRetryRate) + for rm.Wait(cp.ctx) == nil { + wc, err := cp.em.NewWatchChannel(cp.ctx) + if err != nil { + cp.lg.Warn("failed to establish endpoint watch", zap.String("prefix", prefix), zap.Error(err)) + continue + } + cp.monitor(wc) + } +} + +func (cp *clusterProxy) monitor(wa endpoints.WatchChannel) { + for { + select { + case <-cp.ctx.Done(): + cp.lg.Info("watching endpoints interrupted", zap.Error(cp.ctx.Err())) + return + case updates := <-wa: + cp.umu.Lock() + for _, up := range updates { + switch up.Op { + case endpoints.Add: + cp.umap[up.Endpoint.Addr] = up.Endpoint + case endpoints.Delete: + delete(cp.umap, up.Endpoint.Addr) + } + } + cp.umu.Unlock() + } + } +} + +func (cp *clusterProxy) MemberAdd(ctx context.Context, r *pb.MemberAddRequest) (*pb.MemberAddResponse, error) { + if r.IsLearner { + return cp.memberAddAsLearner(ctx, r.PeerURLs) + } + return cp.memberAdd(ctx, r.PeerURLs) +} + +func (cp *clusterProxy) memberAdd(ctx context.Context, peerURLs []string) (*pb.MemberAddResponse, error) { + mresp, err := cp.clus.MemberAdd(ctx, peerURLs) + if err != nil { + return nil, err + } + resp := (pb.MemberAddResponse)(*mresp) + return &resp, err +} + +func (cp *clusterProxy) memberAddAsLearner(ctx context.Context, peerURLs []string) (*pb.MemberAddResponse, error) { + mresp, err := cp.clus.MemberAddAsLearner(ctx, peerURLs) + if err != nil { + return nil, err + } + resp := (pb.MemberAddResponse)(*mresp) + return &resp, err +} + +func (cp *clusterProxy) MemberRemove(ctx context.Context, r *pb.MemberRemoveRequest) (*pb.MemberRemoveResponse, error) { + mresp, err := cp.clus.MemberRemove(ctx, r.ID) + if err != nil { + return nil, err + } + resp := (pb.MemberRemoveResponse)(*mresp) + return &resp, err +} + +func (cp *clusterProxy) MemberUpdate(ctx context.Context, r *pb.MemberUpdateRequest) (*pb.MemberUpdateResponse, error) { + mresp, err := cp.clus.MemberUpdate(ctx, r.ID, r.PeerURLs) + if err != nil { + return nil, err + } + resp := (pb.MemberUpdateResponse)(*mresp) + return &resp, err +} + +func (cp *clusterProxy) membersFromUpdates() ([]*pb.Member, error) { + cp.umu.RLock() + defer cp.umu.RUnlock() + mbs := make([]*pb.Member, 0, len(cp.umap)) + for addr, upt := range cp.umap { + m, err := decodeMeta(fmt.Sprint(upt.Metadata)) + if err != nil { + return nil, err + } + mbs = append(mbs, &pb.Member{Name: m.Name, ClientURLs: []string{addr}}) + } + return mbs, nil +} + +// MemberList wraps member list API with following rules: +// - If 'advaddr' is not empty and 'prefix' is not empty, return registered member lists via resolver +// - If 'advaddr' is not empty and 'prefix' is not empty and registered grpc-proxy members haven't been fetched, return the 'advaddr' +// - If 'advaddr' is not empty and 'prefix' is empty, return 'advaddr' without forcing it to 'register' +// - If 'advaddr' is empty, forward to member list API +func (cp *clusterProxy) MemberList(ctx context.Context, r *pb.MemberListRequest) (*pb.MemberListResponse, error) { + if cp.advaddr != "" { + if cp.prefix != "" { + mbs, err := cp.membersFromUpdates() + if err != nil { + return nil, err + } + if len(mbs) > 0 { + return &pb.MemberListResponse{Members: mbs}, nil + } + } + // prefix is empty or no grpc-proxy members haven't been registered + hostname, _ := os.Hostname() + return &pb.MemberListResponse{Members: []*pb.Member{{Name: hostname, ClientURLs: []string{cp.advaddr}}}}, nil + } + mresp, err := cp.clus.MemberList(ctx) + if err != nil { + return nil, err + } + resp := (pb.MemberListResponse)(*mresp) + return &resp, err +} + +func (cp *clusterProxy) MemberPromote(ctx context.Context, r *pb.MemberPromoteRequest) (*pb.MemberPromoteResponse, error) { + // TODO: implement + return nil, errors.New("not implemented") +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/doc.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/doc.go new file mode 100644 index 0000000000..fc022e3c57 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/doc.go @@ -0,0 +1,16 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package grpcproxy is an OSI level 7 proxy for etcd v3 API requests. +package grpcproxy diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/election.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/election.go new file mode 100644 index 0000000000..a9ec0fddd3 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/election.go @@ -0,0 +1,65 @@ +// Copyright 2017 The etcd Lockors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/etcdserver/api/v3election/v3electionpb" +) + +type electionProxy struct { + client *clientv3.Client +} + +func NewElectionProxy(client *clientv3.Client) v3electionpb.ElectionServer { + return &electionProxy{client: client} +} + +func (ep *electionProxy) Campaign(ctx context.Context, req *v3electionpb.CampaignRequest) (*v3electionpb.CampaignResponse, error) { + return v3electionpb.NewElectionClient(ep.client.ActiveConnection()).Campaign(ctx, req) +} + +func (ep *electionProxy) Proclaim(ctx context.Context, req *v3electionpb.ProclaimRequest) (*v3electionpb.ProclaimResponse, error) { + return v3electionpb.NewElectionClient(ep.client.ActiveConnection()).Proclaim(ctx, req) +} + +func (ep *electionProxy) Leader(ctx context.Context, req *v3electionpb.LeaderRequest) (*v3electionpb.LeaderResponse, error) { + return v3electionpb.NewElectionClient(ep.client.ActiveConnection()).Leader(ctx, req) +} + +func (ep *electionProxy) Observe(req *v3electionpb.LeaderRequest, s v3electionpb.Election_ObserveServer) error { + conn := ep.client.ActiveConnection() + ctx, cancel := context.WithCancel(s.Context()) + defer cancel() + sc, err := v3electionpb.NewElectionClient(conn).Observe(ctx, req) + if err != nil { + return err + } + for { + rr, err := sc.Recv() + if err != nil { + return err + } + if err = s.Send(rr); err != nil { + return err + } + } +} + +func (ep *electionProxy) Resign(ctx context.Context, req *v3electionpb.ResignRequest) (*v3electionpb.ResignResponse, error) { + return v3electionpb.NewElectionClient(ep.client.ActiveConnection()).Resign(ctx, req) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/health.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/health.go new file mode 100644 index 0000000000..882af4b46a --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/health.go @@ -0,0 +1,76 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "fmt" + "net/http" + "time" + + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" + "go.uber.org/zap" +) + +// HandleHealth registers health handler on '/health'. +func HandleHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) { + if lg == nil { + lg = zap.NewNop() + } + mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkHealth(c) })) +} + +// HandleProxyHealth registers health handler on '/proxy/health'. +func HandleProxyHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) { + if lg == nil { + lg = zap.NewNop() + } + mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkProxyHealth(c) })) +} + +func checkHealth(c *clientv3.Client) etcdhttp.Health { + h := etcdhttp.Health{Health: "false"} + ctx, cancel := context.WithTimeout(c.Ctx(), time.Second) + _, err := c.Get(ctx, "a") + cancel() + if err == nil || err == rpctypes.ErrPermissionDenied { + h.Health = "true" + } else { + h.Reason = fmt.Sprintf("GET ERROR:%s", err) + } + return h +} + +func checkProxyHealth(c *clientv3.Client) etcdhttp.Health { + if c == nil { + return etcdhttp.Health{Health: "false", Reason: "no connection to proxy"} + } + h := checkHealth(c) + if h.Health != "true" { + return h + } + ctx, cancel := context.WithTimeout(c.Ctx(), time.Second*3) + ch := c.Watch(ctx, "a", clientv3.WithCreatedNotify()) + select { + case <-ch: + case <-ctx.Done(): + h.Health = "false" + h.Reason = "WATCH TIMEOUT" + } + cancel() + return h +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/kv.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/kv.go new file mode 100644 index 0000000000..6e88eb9fb9 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/kv.go @@ -0,0 +1,232 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache" +) + +type kvProxy struct { + kv clientv3.KV + cache cache.Cache +} + +func NewKvProxy(c *clientv3.Client) (pb.KVServer, <-chan struct{}) { + kv := &kvProxy{ + kv: c.KV, + cache: cache.NewCache(cache.DefaultMaxEntries), + } + donec := make(chan struct{}) + close(donec) + return kv, donec +} + +func (p *kvProxy) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) { + if r.Serializable { + resp, err := p.cache.Get(r) + switch err { + case nil: + cacheHits.Inc() + return resp, nil + case cache.ErrCompacted: + cacheHits.Inc() + return nil, err + } + + cachedMisses.Inc() + } + + resp, err := p.kv.Do(ctx, RangeRequestToOp(r)) + if err != nil { + return nil, err + } + + // cache linearizable as serializable + req := *r + req.Serializable = true + gresp := (*pb.RangeResponse)(resp.Get()) + p.cache.Add(&req, gresp) + cacheKeys.Set(float64(p.cache.Size())) + + return gresp, nil +} + +func (p *kvProxy) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) { + p.cache.Invalidate(r.Key, nil) + cacheKeys.Set(float64(p.cache.Size())) + + resp, err := p.kv.Do(ctx, PutRequestToOp(r)) + return (*pb.PutResponse)(resp.Put()), err +} + +func (p *kvProxy) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) { + p.cache.Invalidate(r.Key, r.RangeEnd) + cacheKeys.Set(float64(p.cache.Size())) + + resp, err := p.kv.Do(ctx, DelRequestToOp(r)) + return (*pb.DeleteRangeResponse)(resp.Del()), err +} + +func (p *kvProxy) txnToCache(reqs []*pb.RequestOp, resps []*pb.ResponseOp) { + for i := range resps { + switch tv := resps[i].Response.(type) { + case *pb.ResponseOp_ResponsePut: + p.cache.Invalidate(reqs[i].GetRequestPut().Key, nil) + case *pb.ResponseOp_ResponseDeleteRange: + rdr := reqs[i].GetRequestDeleteRange() + p.cache.Invalidate(rdr.Key, rdr.RangeEnd) + case *pb.ResponseOp_ResponseRange: + req := *(reqs[i].GetRequestRange()) + req.Serializable = true + p.cache.Add(&req, tv.ResponseRange) + } + } +} + +func (p *kvProxy) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) { + op := TxnRequestToOp(r) + opResp, err := p.kv.Do(ctx, op) + if err != nil { + return nil, err + } + resp := opResp.Txn() + + // txn may claim an outdated key is updated; be safe and invalidate + for _, cmp := range r.Compare { + p.cache.Invalidate(cmp.Key, cmp.RangeEnd) + } + // update any fetched keys + if resp.Succeeded { + p.txnToCache(r.Success, resp.Responses) + } else { + p.txnToCache(r.Failure, resp.Responses) + } + + cacheKeys.Set(float64(p.cache.Size())) + + return (*pb.TxnResponse)(resp), nil +} + +func (p *kvProxy) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) { + var opts []clientv3.CompactOption + if r.Physical { + opts = append(opts, clientv3.WithCompactPhysical()) + } + + resp, err := p.kv.Compact(ctx, r.Revision, opts...) + if err == nil { + p.cache.Compact(r.Revision) + } + + cacheKeys.Set(float64(p.cache.Size())) + + return (*pb.CompactionResponse)(resp), err +} + +func requestOpToOp(union *pb.RequestOp) clientv3.Op { + switch tv := union.Request.(type) { + case *pb.RequestOp_RequestRange: + if tv.RequestRange != nil { + return RangeRequestToOp(tv.RequestRange) + } + case *pb.RequestOp_RequestPut: + if tv.RequestPut != nil { + return PutRequestToOp(tv.RequestPut) + } + case *pb.RequestOp_RequestDeleteRange: + if tv.RequestDeleteRange != nil { + return DelRequestToOp(tv.RequestDeleteRange) + } + case *pb.RequestOp_RequestTxn: + if tv.RequestTxn != nil { + return TxnRequestToOp(tv.RequestTxn) + } + } + panic("unknown request") +} + +func RangeRequestToOp(r *pb.RangeRequest) clientv3.Op { + opts := []clientv3.OpOption{} + if len(r.RangeEnd) != 0 { + opts = append(opts, clientv3.WithRange(string(r.RangeEnd))) + } + opts = append(opts, clientv3.WithRev(r.Revision)) + opts = append(opts, clientv3.WithLimit(r.Limit)) + opts = append(opts, clientv3.WithSort( + clientv3.SortTarget(r.SortTarget), + clientv3.SortOrder(r.SortOrder)), + ) + opts = append(opts, clientv3.WithMaxCreateRev(r.MaxCreateRevision)) + opts = append(opts, clientv3.WithMinCreateRev(r.MinCreateRevision)) + opts = append(opts, clientv3.WithMaxModRev(r.MaxModRevision)) + opts = append(opts, clientv3.WithMinModRev(r.MinModRevision)) + if r.CountOnly { + opts = append(opts, clientv3.WithCountOnly()) + } + if r.KeysOnly { + opts = append(opts, clientv3.WithKeysOnly()) + } + if r.Serializable { + opts = append(opts, clientv3.WithSerializable()) + } + + return clientv3.OpGet(string(r.Key), opts...) +} + +func PutRequestToOp(r *pb.PutRequest) clientv3.Op { + opts := []clientv3.OpOption{} + opts = append(opts, clientv3.WithLease(clientv3.LeaseID(r.Lease))) + if r.IgnoreValue { + opts = append(opts, clientv3.WithIgnoreValue()) + } + if r.IgnoreLease { + opts = append(opts, clientv3.WithIgnoreLease()) + } + if r.PrevKv { + opts = append(opts, clientv3.WithPrevKV()) + } + return clientv3.OpPut(string(r.Key), string(r.Value), opts...) +} + +func DelRequestToOp(r *pb.DeleteRangeRequest) clientv3.Op { + opts := []clientv3.OpOption{} + if len(r.RangeEnd) != 0 { + opts = append(opts, clientv3.WithRange(string(r.RangeEnd))) + } + if r.PrevKv { + opts = append(opts, clientv3.WithPrevKV()) + } + return clientv3.OpDelete(string(r.Key), opts...) +} + +func TxnRequestToOp(r *pb.TxnRequest) clientv3.Op { + cmps := make([]clientv3.Cmp, len(r.Compare)) + thenops := make([]clientv3.Op, len(r.Success)) + elseops := make([]clientv3.Op, len(r.Failure)) + for i := range r.Compare { + cmps[i] = (clientv3.Cmp)(*r.Compare[i]) + } + for i := range r.Success { + thenops[i] = requestOpToOp(r.Success[i]) + } + for i := range r.Failure { + elseops[i] = requestOpToOp(r.Failure[i]) + } + return clientv3.OpTxn(cmps, thenops, elseops) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/leader.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/leader.go new file mode 100644 index 0000000000..c078c89bb3 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/leader.go @@ -0,0 +1,113 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "math" + "sync" + + "go.etcd.io/etcd/client/v3" + + "golang.org/x/time/rate" +) + +const ( + lostLeaderKey = "__lostleader" // watched to detect leader loss + retryPerSecond = 10 +) + +type leader struct { + ctx context.Context + w clientv3.Watcher + // mu protects leaderc updates. + mu sync.RWMutex + leaderc chan struct{} + disconnc chan struct{} + donec chan struct{} +} + +func newLeader(ctx context.Context, w clientv3.Watcher) *leader { + l := &leader{ + ctx: clientv3.WithRequireLeader(ctx), + w: w, + leaderc: make(chan struct{}), + disconnc: make(chan struct{}), + donec: make(chan struct{}), + } + // begin assuming leader is lost + close(l.leaderc) + go l.recvLoop() + return l +} + +func (l *leader) recvLoop() { + defer close(l.donec) + + limiter := rate.NewLimiter(rate.Limit(retryPerSecond), retryPerSecond) + rev := int64(math.MaxInt64 - 2) + for limiter.Wait(l.ctx) == nil { + wch := l.w.Watch(l.ctx, lostLeaderKey, clientv3.WithRev(rev), clientv3.WithCreatedNotify()) + cresp, ok := <-wch + if !ok { + l.loseLeader() + continue + } + if cresp.Err() != nil { + l.loseLeader() + if clientv3.IsConnCanceled(cresp.Err()) { + close(l.disconnc) + return + } + continue + } + l.gotLeader() + <-wch + l.loseLeader() + } +} + +func (l *leader) loseLeader() { + l.mu.RLock() + defer l.mu.RUnlock() + select { + case <-l.leaderc: + default: + close(l.leaderc) + } +} + +// gotLeader will force update the leadership status to having a leader. +func (l *leader) gotLeader() { + l.mu.Lock() + defer l.mu.Unlock() + select { + case <-l.leaderc: + l.leaderc = make(chan struct{}) + default: + } +} + +func (l *leader) disconnectNotify() <-chan struct{} { return l.disconnc } + +func (l *leader) stopNotify() <-chan struct{} { return l.donec } + +// lostNotify returns a channel that is closed if there has been +// a leader loss not yet followed by a leader reacquire. +func (l *leader) lostNotify() <-chan struct{} { + l.mu.RLock() + defer l.mu.RUnlock() + return l.leaderc +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lease.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lease.go new file mode 100644 index 0000000000..875256c432 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lease.go @@ -0,0 +1,384 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "io" + "sync" + "sync/atomic" + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/client/v3" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +type leaseProxy struct { + // leaseClient handles req from LeaseGrant() that requires a lease ID. + leaseClient pb.LeaseClient + + lessor clientv3.Lease + + ctx context.Context + + leader *leader + + // mu protects adding outstanding leaseProxyStream through wg. + mu sync.RWMutex + + // wg waits until all outstanding leaseProxyStream quit. + wg sync.WaitGroup +} + +func NewLeaseProxy(ctx context.Context, c *clientv3.Client) (pb.LeaseServer, <-chan struct{}) { + cctx, cancel := context.WithCancel(ctx) + lp := &leaseProxy{ + leaseClient: pb.NewLeaseClient(c.ActiveConnection()), + lessor: c.Lease, + ctx: cctx, + leader: newLeader(cctx, c.Watcher), + } + ch := make(chan struct{}) + go func() { + defer close(ch) + <-lp.leader.stopNotify() + lp.mu.Lock() + select { + case <-lp.ctx.Done(): + case <-lp.leader.disconnectNotify(): + cancel() + } + <-lp.ctx.Done() + lp.mu.Unlock() + lp.wg.Wait() + }() + return lp, ch +} + +func (lp *leaseProxy) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) { + rp, err := lp.leaseClient.LeaseGrant(ctx, cr, grpc.WaitForReady(true)) + if err != nil { + return nil, err + } + lp.leader.gotLeader() + return rp, nil +} + +func (lp *leaseProxy) LeaseRevoke(ctx context.Context, rr *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) { + r, err := lp.lessor.Revoke(ctx, clientv3.LeaseID(rr.ID)) + if err != nil { + return nil, err + } + lp.leader.gotLeader() + return (*pb.LeaseRevokeResponse)(r), nil +} + +func (lp *leaseProxy) LeaseTimeToLive(ctx context.Context, rr *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) { + var ( + r *clientv3.LeaseTimeToLiveResponse + err error + ) + if rr.Keys { + r, err = lp.lessor.TimeToLive(ctx, clientv3.LeaseID(rr.ID), clientv3.WithAttachedKeys()) + } else { + r, err = lp.lessor.TimeToLive(ctx, clientv3.LeaseID(rr.ID)) + } + if err != nil { + return nil, err + } + rp := &pb.LeaseTimeToLiveResponse{ + Header: r.ResponseHeader, + ID: int64(r.ID), + TTL: r.TTL, + GrantedTTL: r.GrantedTTL, + Keys: r.Keys, + } + return rp, err +} + +func (lp *leaseProxy) LeaseLeases(ctx context.Context, rr *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) { + r, err := lp.lessor.Leases(ctx) + if err != nil { + return nil, err + } + leases := make([]*pb.LeaseStatus, len(r.Leases)) + for i := range r.Leases { + leases[i] = &pb.LeaseStatus{ID: int64(r.Leases[i].ID)} + } + rp := &pb.LeaseLeasesResponse{ + Header: r.ResponseHeader, + Leases: leases, + } + return rp, err +} + +func (lp *leaseProxy) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) error { + lp.mu.Lock() + select { + case <-lp.ctx.Done(): + lp.mu.Unlock() + return lp.ctx.Err() + default: + lp.wg.Add(1) + } + lp.mu.Unlock() + + ctx, cancel := context.WithCancel(stream.Context()) + lps := leaseProxyStream{ + stream: stream, + lessor: lp.lessor, + keepAliveLeases: make(map[int64]*atomicCounter), + respc: make(chan *pb.LeaseKeepAliveResponse), + ctx: ctx, + cancel: cancel, + } + + errc := make(chan error, 2) + + var lostLeaderC <-chan struct{} + if md, ok := metadata.FromOutgoingContext(stream.Context()); ok { + v := md[rpctypes.MetadataRequireLeaderKey] + if len(v) > 0 && v[0] == rpctypes.MetadataHasLeader { + lostLeaderC = lp.leader.lostNotify() + // if leader is known to be lost at creation time, avoid + // letting events through at all + select { + case <-lostLeaderC: + lp.wg.Done() + return rpctypes.ErrNoLeader + default: + } + } + } + stopc := make(chan struct{}, 3) + go func() { + defer func() { stopc <- struct{}{} }() + if err := lps.recvLoop(); err != nil { + errc <- err + } + }() + + go func() { + defer func() { stopc <- struct{}{} }() + if err := lps.sendLoop(); err != nil { + errc <- err + } + }() + + // tears down LeaseKeepAlive stream if leader goes down or entire leaseProxy is terminated. + go func() { + defer func() { stopc <- struct{}{} }() + select { + case <-lostLeaderC: + case <-ctx.Done(): + case <-lp.ctx.Done(): + } + }() + + var err error + select { + case <-stopc: + stopc <- struct{}{} + case err = <-errc: + } + cancel() + + // recv/send may only shutdown after function exits; + // this goroutine notifies lease proxy that the stream is through + go func() { + <-stopc + <-stopc + <-stopc + lps.close() + close(errc) + lp.wg.Done() + }() + + select { + case <-lostLeaderC: + return rpctypes.ErrNoLeader + case <-lp.leader.disconnectNotify(): + return status.Error(codes.Canceled, "the client connection is closing") + default: + if err != nil { + return err + } + return ctx.Err() + } +} + +type leaseProxyStream struct { + stream pb.Lease_LeaseKeepAliveServer + + lessor clientv3.Lease + // wg tracks keepAliveLoop goroutines + wg sync.WaitGroup + // mu protects keepAliveLeases + mu sync.RWMutex + // keepAliveLeases tracks how many outstanding keepalive requests which need responses are on a lease. + keepAliveLeases map[int64]*atomicCounter + // respc receives lease keepalive responses from etcd backend + respc chan *pb.LeaseKeepAliveResponse + + ctx context.Context + cancel context.CancelFunc +} + +func (lps *leaseProxyStream) recvLoop() error { + for { + rr, err := lps.stream.Recv() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + lps.mu.Lock() + neededResps, ok := lps.keepAliveLeases[rr.ID] + if !ok { + neededResps = &atomicCounter{} + lps.keepAliveLeases[rr.ID] = neededResps + lps.wg.Add(1) + go func() { + defer lps.wg.Done() + if err := lps.keepAliveLoop(rr.ID, neededResps); err != nil { + lps.cancel() + } + }() + } + neededResps.add(1) + lps.mu.Unlock() + } +} + +func (lps *leaseProxyStream) keepAliveLoop(leaseID int64, neededResps *atomicCounter) error { + cctx, ccancel := context.WithCancel(lps.ctx) + defer ccancel() + respc, err := lps.lessor.KeepAlive(cctx, clientv3.LeaseID(leaseID)) + if err != nil { + return err + } + // ticker expires when loop hasn't received keepalive within TTL + var ticker <-chan time.Time + for { + select { + case <-ticker: + lps.mu.Lock() + // if there are outstanding keepAlive reqs at the moment of ticker firing, + // don't close keepAliveLoop(), let it continuing to process the KeepAlive reqs. + if neededResps.get() > 0 { + lps.mu.Unlock() + ticker = nil + continue + } + delete(lps.keepAliveLeases, leaseID) + lps.mu.Unlock() + return nil + case rp, ok := <-respc: + if !ok { + lps.mu.Lock() + delete(lps.keepAliveLeases, leaseID) + lps.mu.Unlock() + if neededResps.get() == 0 { + return nil + } + ttlResp, err := lps.lessor.TimeToLive(cctx, clientv3.LeaseID(leaseID)) + if err != nil { + return err + } + r := &pb.LeaseKeepAliveResponse{ + Header: ttlResp.ResponseHeader, + ID: int64(ttlResp.ID), + TTL: ttlResp.TTL, + } + for neededResps.get() > 0 { + select { + case lps.respc <- r: + neededResps.add(-1) + case <-lps.ctx.Done(): + return nil + } + } + return nil + } + if neededResps.get() == 0 { + continue + } + ticker = time.After(time.Duration(rp.TTL) * time.Second) + r := &pb.LeaseKeepAliveResponse{ + Header: rp.ResponseHeader, + ID: int64(rp.ID), + TTL: rp.TTL, + } + lps.replyToClient(r, neededResps) + } + } +} + +func (lps *leaseProxyStream) replyToClient(r *pb.LeaseKeepAliveResponse, neededResps *atomicCounter) { + timer := time.After(500 * time.Millisecond) + for neededResps.get() > 0 { + select { + case lps.respc <- r: + neededResps.add(-1) + case <-timer: + return + case <-lps.ctx.Done(): + return + } + } +} + +func (lps *leaseProxyStream) sendLoop() error { + for { + select { + case lrp, ok := <-lps.respc: + if !ok { + return nil + } + if err := lps.stream.Send(lrp); err != nil { + return err + } + case <-lps.ctx.Done(): + return lps.ctx.Err() + } + } +} + +func (lps *leaseProxyStream) close() { + lps.cancel() + lps.wg.Wait() + // only close respc channel if all the keepAliveLoop() goroutines have finished + // this ensures those goroutines don't send resp to a closed resp channel + close(lps.respc) +} + +type atomicCounter struct { + counter int64 +} + +func (ac *atomicCounter) add(delta int64) { + atomic.AddInt64(&ac.counter, delta) +} + +func (ac *atomicCounter) get() int64 { + return atomic.LoadInt64(&ac.counter) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lock.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lock.go new file mode 100644 index 0000000000..9859b93699 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/lock.go @@ -0,0 +1,38 @@ +// Copyright 2017 The etcd Lockors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb" +) + +type lockProxy struct { + client *clientv3.Client +} + +func NewLockProxy(client *clientv3.Client) v3lockpb.LockServer { + return &lockProxy{client: client} +} + +func (lp *lockProxy) Lock(ctx context.Context, req *v3lockpb.LockRequest) (*v3lockpb.LockResponse, error) { + return v3lockpb.NewLockClient(lp.client.ActiveConnection()).Lock(ctx, req) +} + +func (lp *lockProxy) Unlock(ctx context.Context, req *v3lockpb.UnlockRequest) (*v3lockpb.UnlockResponse, error) { + return v3lockpb.NewLockClient(lp.client.ActiveConnection()).Unlock(ctx, req) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/maintenance.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/maintenance.go new file mode 100644 index 0000000000..3e81656259 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/maintenance.go @@ -0,0 +1,95 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "io" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/client/v3" +) + +type maintenanceProxy struct { + client *clientv3.Client +} + +func NewMaintenanceProxy(c *clientv3.Client) pb.MaintenanceServer { + return &maintenanceProxy{ + client: c, + } +} + +func (mp *maintenanceProxy) Defragment(ctx context.Context, dr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).Defragment(ctx, dr) +} + +func (mp *maintenanceProxy) Snapshot(sr *pb.SnapshotRequest, stream pb.Maintenance_SnapshotServer) error { + conn := mp.client.ActiveConnection() + ctx, cancel := context.WithCancel(stream.Context()) + defer cancel() + + ctx = withClientAuthToken(ctx, stream.Context()) + + sc, err := pb.NewMaintenanceClient(conn).Snapshot(ctx, sr) + if err != nil { + return err + } + + for { + rr, err := sc.Recv() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + err = stream.Send(rr) + if err != nil { + return err + } + } +} + +func (mp *maintenanceProxy) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).Hash(ctx, r) +} + +func (mp *maintenanceProxy) HashKV(ctx context.Context, r *pb.HashKVRequest) (*pb.HashKVResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).HashKV(ctx, r) +} + +func (mp *maintenanceProxy) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).Alarm(ctx, r) +} + +func (mp *maintenanceProxy) Status(ctx context.Context, r *pb.StatusRequest) (*pb.StatusResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).Status(ctx, r) +} + +func (mp *maintenanceProxy) MoveLeader(ctx context.Context, r *pb.MoveLeaderRequest) (*pb.MoveLeaderResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).MoveLeader(ctx, r) +} + +func (mp *maintenanceProxy) Downgrade(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) { + conn := mp.client.ActiveConnection() + return pb.NewMaintenanceClient(conn).Downgrade(ctx, r) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/metrics.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/metrics.go new file mode 100644 index 0000000000..01a7a94c89 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/metrics.go @@ -0,0 +1,121 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" +) + +var ( + watchersCoalescing = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "grpc_proxy", + Name: "watchers_coalescing_total", + Help: "Total number of current watchers coalescing", + }) + eventsCoalescing = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "grpc_proxy", + Name: "events_coalescing_total", + Help: "Total number of events coalescing", + }) + cacheKeys = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "grpc_proxy", + Name: "cache_keys_total", + Help: "Total number of keys/ranges cached", + }) + cacheHits = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "grpc_proxy", + Name: "cache_hits_total", + Help: "Total number of cache hits", + }) + cachedMisses = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "etcd", + Subsystem: "grpc_proxy", + Name: "cache_misses_total", + Help: "Total number of cache misses", + }) +) + +func init() { + prometheus.MustRegister(watchersCoalescing) + prometheus.MustRegister(eventsCoalescing) + prometheus.MustRegister(cacheKeys) + prometheus.MustRegister(cacheHits) + prometheus.MustRegister(cachedMisses) +} + +// HandleMetrics performs a GET request against etcd endpoint and returns '/metrics'. +func HandleMetrics(mux *http.ServeMux, c *http.Client, eps []string) { + // random shuffle endpoints + r := rand.New(rand.NewSource(int64(time.Now().Nanosecond()))) + if len(eps) > 1 { + eps = shuffleEndpoints(r, eps) + } + + pathMetrics := etcdhttp.PathMetrics + mux.HandleFunc(pathMetrics, func(w http.ResponseWriter, r *http.Request) { + target := fmt.Sprintf("%s%s", eps[0], pathMetrics) + if !strings.HasPrefix(target, "http") { + scheme := "http" + if r.TLS != nil { + scheme = "https" + } + target = fmt.Sprintf("%s://%s", scheme, target) + } + + resp, err := c.Get(target) + if err != nil { + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + defer resp.Body.Close() + w.Header().Set("Content-Type", "text/plain; version=0.0.4") + body, _ := ioutil.ReadAll(resp.Body) + fmt.Fprintf(w, "%s", body) + }) +} + +// HandleProxyMetrics registers metrics handler on '/proxy/metrics'. +func HandleProxyMetrics(mux *http.ServeMux) { + mux.Handle(etcdhttp.PathProxyMetrics, promhttp.Handler()) +} + +func shuffleEndpoints(r *rand.Rand, eps []string) []string { + // copied from Go 1.9<= rand.Rand.Perm + n := len(eps) + p := make([]int, n) + for i := 0; i < n; i++ { + j := r.Intn(i + 1) + p[i] = p[j] + p[j] = i + } + neps := make([]string, n) + for i, k := range p { + neps[i] = eps[k] + } + return neps +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/register.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/register.go new file mode 100644 index 0000000000..14ec034ead --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/register.go @@ -0,0 +1,102 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "encoding/json" + "os" + + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/concurrency" + "go.etcd.io/etcd/client/v3/naming/endpoints" + + "go.uber.org/zap" + "golang.org/x/time/rate" +) + +// allow maximum 1 retry per second +const registerRetryRate = 1 + +// Register registers itself as a grpc-proxy server by writing prefixed-key +// with session of specified TTL (in seconds). The returned channel is closed +// when the client's context is canceled. +func Register(lg *zap.Logger, c *clientv3.Client, prefix string, addr string, ttl int) <-chan struct{} { + rm := rate.NewLimiter(rate.Limit(registerRetryRate), registerRetryRate) + + donec := make(chan struct{}) + go func() { + defer close(donec) + + for rm.Wait(c.Ctx()) == nil { + ss, err := registerSession(lg, c, prefix, addr, ttl) + if err != nil { + lg.Warn("failed to create a session", zap.Error(err)) + continue + } + select { + case <-c.Ctx().Done(): + ss.Close() + return + + case <-ss.Done(): + lg.Warn("session expired; possible network partition or server restart") + lg.Warn("creating a new session to rejoin") + continue + } + } + }() + + return donec +} + +func registerSession(lg *zap.Logger, c *clientv3.Client, prefix string, addr string, ttl int) (*concurrency.Session, error) { + ss, err := concurrency.NewSession(c, concurrency.WithTTL(ttl)) + if err != nil { + return nil, err + } + + em, err := endpoints.NewManager(c, prefix) + if err != nil { + return nil, err + } + endpoint := endpoints.Endpoint{Addr: addr, Metadata: getMeta()} + if err = em.AddEndpoint(c.Ctx(), prefix+"/"+addr, endpoint, clientv3.WithLease(ss.Lease())); err != nil { + return nil, err + } + + lg.Info( + "registered session with lease", + zap.String("addr", addr), + zap.Int("lease-ttl", ttl), + ) + return ss, nil +} + +// meta represents metadata of proxy register. +type meta struct { + Name string `json:"name"` +} + +func getMeta() string { + hostname, _ := os.Hostname() + bts, _ := json.Marshal(meta{Name: hostname}) + return string(bts) +} + +func decodeMeta(s string) (meta, error) { + m := meta{} + err := json.Unmarshal([]byte(s), &m) + return m, err +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/util.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/util.go new file mode 100644 index 0000000000..856ac5769e --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/util.go @@ -0,0 +1,75 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +func getAuthTokenFromClient(ctx context.Context) string { + md, ok := metadata.FromIncomingContext(ctx) + if ok { + ts, ok := md[rpctypes.TokenFieldNameGRPC] + if ok { + return ts[0] + } + } + return "" +} + +func withClientAuthToken(ctx, ctxWithToken context.Context) context.Context { + token := getAuthTokenFromClient(ctxWithToken) + if token != "" { + ctx = context.WithValue(ctx, rpctypes.TokenFieldNameGRPC, token) + } + return ctx +} + +type proxyTokenCredential struct { + token string +} + +func (cred *proxyTokenCredential) RequireTransportSecurity() bool { + return false +} + +func (cred *proxyTokenCredential) GetRequestMetadata(ctx context.Context, s ...string) (map[string]string, error) { + return map[string]string{ + rpctypes.TokenFieldNameGRPC: cred.token, + }, nil +} + +func AuthUnaryClientInterceptor(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + token := getAuthTokenFromClient(ctx) + if token != "" { + tokenCred := &proxyTokenCredential{token} + opts = append(opts, grpc.PerRPCCredentials(tokenCred)) + } + return invoker(ctx, method, req, reply, cc, opts...) +} + +func AuthStreamClientInterceptor(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + tokenif := ctx.Value(rpctypes.TokenFieldNameGRPC) + if tokenif != nil { + tokenCred := &proxyTokenCredential{tokenif.(string)} + opts = append(opts, grpc.PerRPCCredentials(tokenCred)) + } + return streamer(ctx, desc, cc, method, opts...) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch.go new file mode 100644 index 0000000000..3ec38d600c --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch.go @@ -0,0 +1,313 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "sync" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc" + + "go.uber.org/zap" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +type watchProxy struct { + cw clientv3.Watcher + ctx context.Context + + leader *leader + + ranges *watchRanges + + // mu protects adding outstanding watch servers through wg. + mu sync.Mutex + + // wg waits until all outstanding watch servers quit. + wg sync.WaitGroup + + // kv is used for permission checking + kv clientv3.KV + lg *zap.Logger +} + +func NewWatchProxy(ctx context.Context, lg *zap.Logger, c *clientv3.Client) (pb.WatchServer, <-chan struct{}) { + cctx, cancel := context.WithCancel(ctx) + wp := &watchProxy{ + cw: c.Watcher, + ctx: cctx, + leader: newLeader(cctx, c.Watcher), + + kv: c.KV, // for permission checking + lg: lg, + } + wp.ranges = newWatchRanges(wp) + ch := make(chan struct{}) + go func() { + defer close(ch) + <-wp.leader.stopNotify() + wp.mu.Lock() + select { + case <-wp.ctx.Done(): + case <-wp.leader.disconnectNotify(): + cancel() + } + <-wp.ctx.Done() + wp.mu.Unlock() + wp.wg.Wait() + wp.ranges.stop() + }() + return wp, ch +} + +func (wp *watchProxy) Watch(stream pb.Watch_WatchServer) (err error) { + wp.mu.Lock() + select { + case <-wp.ctx.Done(): + wp.mu.Unlock() + select { + case <-wp.leader.disconnectNotify(): + return status.Error(codes.Canceled, "the client connection is closing") + default: + return wp.ctx.Err() + } + default: + wp.wg.Add(1) + } + wp.mu.Unlock() + + ctx, cancel := context.WithCancel(stream.Context()) + wps := &watchProxyStream{ + ranges: wp.ranges, + watchers: make(map[int64]*watcher), + stream: stream, + watchCh: make(chan *pb.WatchResponse, 1024), + ctx: ctx, + cancel: cancel, + kv: wp.kv, + lg: wp.lg, + } + + var lostLeaderC <-chan struct{} + if md, ok := metadata.FromOutgoingContext(stream.Context()); ok { + v := md[rpctypes.MetadataRequireLeaderKey] + if len(v) > 0 && v[0] == rpctypes.MetadataHasLeader { + lostLeaderC = wp.leader.lostNotify() + // if leader is known to be lost at creation time, avoid + // letting events through at all + select { + case <-lostLeaderC: + wp.wg.Done() + return rpctypes.ErrNoLeader + default: + } + } + } + + // post to stopc => terminate server stream; can't use a waitgroup + // since all goroutines will only terminate after Watch() exits. + stopc := make(chan struct{}, 3) + go func() { + defer func() { stopc <- struct{}{} }() + wps.recvLoop() + }() + go func() { + defer func() { stopc <- struct{}{} }() + wps.sendLoop() + }() + // tear down watch if leader goes down or entire watch proxy is terminated + go func() { + defer func() { stopc <- struct{}{} }() + select { + case <-lostLeaderC: + case <-ctx.Done(): + case <-wp.ctx.Done(): + } + }() + + <-stopc + cancel() + + // recv/send may only shutdown after function exits; + // goroutine notifies proxy that stream is through + go func() { + <-stopc + <-stopc + wps.close() + wp.wg.Done() + }() + + select { + case <-lostLeaderC: + return rpctypes.ErrNoLeader + case <-wp.leader.disconnectNotify(): + return status.Error(codes.Canceled, "the client connection is closing") + default: + return wps.ctx.Err() + } +} + +// watchProxyStream forwards etcd watch events to a proxied client stream. +type watchProxyStream struct { + ranges *watchRanges + + // mu protects watchers and nextWatcherID + mu sync.Mutex + // watchers receive events from watch broadcast. + watchers map[int64]*watcher + // nextWatcherID is the id to assign the next watcher on this stream. + nextWatcherID int64 + + stream pb.Watch_WatchServer + + // watchCh receives watch responses from the watchers. + watchCh chan *pb.WatchResponse + + ctx context.Context + cancel context.CancelFunc + + // kv is used for permission checking + kv clientv3.KV + lg *zap.Logger +} + +func (wps *watchProxyStream) close() { + var wg sync.WaitGroup + wps.cancel() + wps.mu.Lock() + wg.Add(len(wps.watchers)) + for _, wpsw := range wps.watchers { + go func(w *watcher) { + wps.ranges.delete(w) + wg.Done() + }(wpsw) + } + wps.watchers = nil + wps.mu.Unlock() + + wg.Wait() + + close(wps.watchCh) +} + +func (wps *watchProxyStream) checkPermissionForWatch(key, rangeEnd []byte) error { + if len(key) == 0 { + // If the length of the key is 0, we need to obtain full range. + // look at clientv3.WithPrefix() + key = []byte{0} + rangeEnd = []byte{0} + } + req := &pb.RangeRequest{ + Serializable: true, + Key: key, + RangeEnd: rangeEnd, + CountOnly: true, + Limit: 1, + } + _, err := wps.kv.Do(wps.ctx, RangeRequestToOp(req)) + return err +} + +func (wps *watchProxyStream) recvLoop() error { + for { + req, err := wps.stream.Recv() + if err != nil { + return err + } + switch uv := req.RequestUnion.(type) { + case *pb.WatchRequest_CreateRequest: + cr := uv.CreateRequest + + if err := wps.checkPermissionForWatch(cr.Key, cr.RangeEnd); err != nil { + wps.watchCh <- &pb.WatchResponse{ + Header: &pb.ResponseHeader{}, + WatchId: -1, + Created: true, + Canceled: true, + CancelReason: err.Error(), + } + continue + } + + wps.mu.Lock() + w := &watcher{ + wr: watchRange{string(cr.Key), string(cr.RangeEnd)}, + id: wps.nextWatcherID, + wps: wps, + + nextrev: cr.StartRevision, + progress: cr.ProgressNotify, + prevKV: cr.PrevKv, + filters: v3rpc.FiltersFromRequest(cr), + } + if !w.wr.valid() { + w.post(&pb.WatchResponse{WatchId: -1, Created: true, Canceled: true}) + wps.mu.Unlock() + continue + } + wps.nextWatcherID++ + w.nextrev = cr.StartRevision + wps.watchers[w.id] = w + wps.ranges.add(w) + wps.mu.Unlock() + wps.lg.Debug("create watcher", zap.String("key", w.wr.key), zap.String("end", w.wr.end), zap.Int64("watcherId", wps.nextWatcherID)) + case *pb.WatchRequest_CancelRequest: + wps.delete(uv.CancelRequest.WatchId) + wps.lg.Debug("cancel watcher", zap.Int64("watcherId", uv.CancelRequest.WatchId)) + default: + // Panic or Fatalf would allow to network clients to crash the serve remotely. + wps.lg.Error("not supported request type by gRPC proxy", zap.Stringer("request", req)) + } + } +} + +func (wps *watchProxyStream) sendLoop() { + for { + select { + case wresp, ok := <-wps.watchCh: + if !ok { + return + } + if err := wps.stream.Send(wresp); err != nil { + return + } + case <-wps.ctx.Done(): + return + } + } +} + +func (wps *watchProxyStream) delete(id int64) { + wps.mu.Lock() + defer wps.mu.Unlock() + + w, ok := wps.watchers[id] + if !ok { + return + } + wps.ranges.delete(w) + delete(wps.watchers, id) + resp := &pb.WatchResponse{ + Header: &w.lastHeader, + WatchId: id, + Canceled: true, + } + wps.watchCh <- resp +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcast.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcast.go new file mode 100644 index 0000000000..1d9a43df14 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcast.go @@ -0,0 +1,166 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "context" + "sync" + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + clientv3 "go.etcd.io/etcd/client/v3" + + "go.uber.org/zap" +) + +// watchBroadcast broadcasts a server watcher to many client watchers. +type watchBroadcast struct { + // cancel stops the underlying etcd server watcher and closes ch. + cancel context.CancelFunc + donec chan struct{} + + // mu protects rev and receivers. + mu sync.RWMutex + // nextrev is the minimum expected next revision of the watcher on ch. + nextrev int64 + // receivers contains all the client-side watchers to serve. + receivers map[*watcher]struct{} + // responses counts the number of responses + responses int + lg *zap.Logger +} + +func newWatchBroadcast(lg *zap.Logger, wp *watchProxy, w *watcher, update func(*watchBroadcast)) *watchBroadcast { + cctx, cancel := context.WithCancel(wp.ctx) + wb := &watchBroadcast{ + cancel: cancel, + nextrev: w.nextrev, + receivers: make(map[*watcher]struct{}), + donec: make(chan struct{}), + lg: lg, + } + wb.add(w) + go func() { + defer close(wb.donec) + + opts := []clientv3.OpOption{ + clientv3.WithRange(w.wr.end), + clientv3.WithProgressNotify(), + clientv3.WithRev(wb.nextrev), + clientv3.WithPrevKV(), + clientv3.WithCreatedNotify(), + } + + cctx = withClientAuthToken(cctx, w.wps.stream.Context()) + + wch := wp.cw.Watch(cctx, w.wr.key, opts...) + wp.lg.Debug("watch", zap.String("key", w.wr.key)) + + for wr := range wch { + wb.bcast(wr) + update(wb) + } + }() + return wb +} + +func (wb *watchBroadcast) bcast(wr clientv3.WatchResponse) { + wb.mu.Lock() + defer wb.mu.Unlock() + // watchers start on the given revision, if any; ignore header rev on create + if wb.responses > 0 || wb.nextrev == 0 { + wb.nextrev = wr.Header.Revision + 1 + } + wb.responses++ + for r := range wb.receivers { + r.send(wr) + } + if len(wb.receivers) > 0 { + eventsCoalescing.Add(float64(len(wb.receivers) - 1)) + } +} + +// add puts a watcher into receiving a broadcast if its revision at least +// meets the broadcast revision. Returns true if added. +func (wb *watchBroadcast) add(w *watcher) bool { + wb.mu.Lock() + defer wb.mu.Unlock() + if wb.nextrev > w.nextrev || (wb.nextrev == 0 && w.nextrev != 0) { + // wb is too far ahead, w will miss events + // or wb is being established with a current watcher + return false + } + if wb.responses == 0 { + // Newly created; create event will be sent by etcd. + wb.receivers[w] = struct{}{} + return true + } + // already sent by etcd; emulate create event + ok := w.post(&pb.WatchResponse{ + Header: &pb.ResponseHeader{ + // todo: fill in ClusterId + // todo: fill in MemberId: + Revision: w.nextrev, + // todo: fill in RaftTerm: + }, + WatchId: w.id, + Created: true, + }) + if !ok { + return false + } + wb.receivers[w] = struct{}{} + watchersCoalescing.Inc() + + return true +} +func (wb *watchBroadcast) delete(w *watcher) { + wb.mu.Lock() + defer wb.mu.Unlock() + if _, ok := wb.receivers[w]; !ok { + panic("deleting missing watcher from broadcast") + } + delete(wb.receivers, w) + if len(wb.receivers) > 0 { + // do not dec the only left watcher for coalescing. + watchersCoalescing.Dec() + } +} + +func (wb *watchBroadcast) size() int { + wb.mu.RLock() + defer wb.mu.RUnlock() + return len(wb.receivers) +} + +func (wb *watchBroadcast) empty() bool { return wb.size() == 0 } + +func (wb *watchBroadcast) stop() { + if !wb.empty() { + // do not dec the only left watcher for coalescing. + watchersCoalescing.Sub(float64(wb.size() - 1)) + } + + wb.cancel() + + select { + case <-wb.donec: + // watchProxyStream will hold watchRanges global mutex lock all the time if client failed to cancel etcd watchers. + // and it will cause the watch proxy to not work. + // please see pr https://github.com/etcd-io/etcd/pull/12030 to get more detail info. + case <-time.After(time.Second): + wb.lg.Error("failed to cancel etcd watcher") + } +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcasts.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcasts.go new file mode 100644 index 0000000000..dacd3007d1 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_broadcasts.go @@ -0,0 +1,135 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "sync" +) + +type watchBroadcasts struct { + wp *watchProxy + + // mu protects bcasts and watchers from the coalesce loop. + mu sync.Mutex + bcasts map[*watchBroadcast]struct{} + watchers map[*watcher]*watchBroadcast + + updatec chan *watchBroadcast + donec chan struct{} +} + +// maxCoalesceRecievers prevents a popular watchBroadcast from being coalseced. +const maxCoalesceReceivers = 5 + +func newWatchBroadcasts(wp *watchProxy) *watchBroadcasts { + wbs := &watchBroadcasts{ + wp: wp, + bcasts: make(map[*watchBroadcast]struct{}), + watchers: make(map[*watcher]*watchBroadcast), + updatec: make(chan *watchBroadcast, 1), + donec: make(chan struct{}), + } + go func() { + defer close(wbs.donec) + for wb := range wbs.updatec { + wbs.coalesce(wb) + } + }() + return wbs +} + +func (wbs *watchBroadcasts) coalesce(wb *watchBroadcast) { + if wb.size() >= maxCoalesceReceivers { + return + } + wbs.mu.Lock() + for wbswb := range wbs.bcasts { + if wbswb == wb { + continue + } + wb.mu.Lock() + wbswb.mu.Lock() + // 1. check if wbswb is behind wb so it won't skip any events in wb + // 2. ensure wbswb started; nextrev == 0 may mean wbswb is waiting + // for a current watcher and expects a create event from the server. + if wb.nextrev >= wbswb.nextrev && wbswb.responses > 0 { + for w := range wb.receivers { + wbswb.receivers[w] = struct{}{} + wbs.watchers[w] = wbswb + } + wb.receivers = nil + } + wbswb.mu.Unlock() + wb.mu.Unlock() + if wb.empty() { + delete(wbs.bcasts, wb) + wb.stop() + break + } + } + wbs.mu.Unlock() +} + +func (wbs *watchBroadcasts) add(w *watcher) { + wbs.mu.Lock() + defer wbs.mu.Unlock() + // find fitting bcast + for wb := range wbs.bcasts { + if wb.add(w) { + wbs.watchers[w] = wb + return + } + } + // no fit; create a bcast + wb := newWatchBroadcast(wbs.wp.lg, wbs.wp, w, wbs.update) + wbs.watchers[w] = wb + wbs.bcasts[wb] = struct{}{} +} + +// delete removes a watcher and returns the number of remaining watchers. +func (wbs *watchBroadcasts) delete(w *watcher) int { + wbs.mu.Lock() + defer wbs.mu.Unlock() + + wb, ok := wbs.watchers[w] + if !ok { + panic("deleting missing watcher from broadcasts") + } + delete(wbs.watchers, w) + wb.delete(w) + if wb.empty() { + delete(wbs.bcasts, wb) + wb.stop() + } + return len(wbs.bcasts) +} + +func (wbs *watchBroadcasts) stop() { + wbs.mu.Lock() + for wb := range wbs.bcasts { + wb.stop() + } + wbs.bcasts = nil + close(wbs.updatec) + wbs.mu.Unlock() + <-wbs.donec +} + +func (wbs *watchBroadcasts) update(wb *watchBroadcast) { + select { + case wbs.updatec <- wb: + default: + } +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_ranges.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_ranges.go new file mode 100644 index 0000000000..31c6b5925e --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watch_ranges.go @@ -0,0 +1,69 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "sync" +) + +// watchRanges tracks all open watches for the proxy. +type watchRanges struct { + wp *watchProxy + + mu sync.Mutex + bcasts map[watchRange]*watchBroadcasts +} + +func newWatchRanges(wp *watchProxy) *watchRanges { + return &watchRanges{ + wp: wp, + bcasts: make(map[watchRange]*watchBroadcasts), + } +} + +func (wrs *watchRanges) add(w *watcher) { + wrs.mu.Lock() + defer wrs.mu.Unlock() + + if wbs := wrs.bcasts[w.wr]; wbs != nil { + wbs.add(w) + return + } + wbs := newWatchBroadcasts(wrs.wp) + wrs.bcasts[w.wr] = wbs + wbs.add(w) +} + +func (wrs *watchRanges) delete(w *watcher) { + wrs.mu.Lock() + defer wrs.mu.Unlock() + wbs, ok := wrs.bcasts[w.wr] + if !ok { + panic("deleting missing range") + } + if wbs.delete(w) == 0 { + wbs.stop() + delete(wrs.bcasts, w.wr) + } +} + +func (wrs *watchRanges) stop() { + wrs.mu.Lock() + defer wrs.mu.Unlock() + for _, wb := range wrs.bcasts { + wb.stop() + } + wrs.bcasts = nil +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watcher.go b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watcher.go new file mode 100644 index 0000000000..5f6c3db808 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/grpcproxy/watcher.go @@ -0,0 +1,130 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package grpcproxy + +import ( + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/mvccpb" + "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/mvcc" +) + +type watchRange struct { + key, end string +} + +func (wr *watchRange) valid() bool { + return len(wr.end) == 0 || wr.end > wr.key || (wr.end[0] == 0 && len(wr.end) == 1) +} + +type watcher struct { + // user configuration + + wr watchRange + filters []mvcc.FilterFunc + progress bool + prevKV bool + + // id is the id returned to the client on its watch stream. + id int64 + // nextrev is the minimum expected next event revision. + nextrev int64 + // lastHeader has the last header sent over the stream. + lastHeader pb.ResponseHeader + + // wps is the parent. + wps *watchProxyStream +} + +// send filters out repeated events by discarding revisions older +// than the last one sent over the watch channel. +func (w *watcher) send(wr clientv3.WatchResponse) { + if wr.IsProgressNotify() && !w.progress { + return + } + if w.nextrev > wr.Header.Revision && len(wr.Events) > 0 { + return + } + if w.nextrev == 0 { + // current watch; expect updates following this revision + w.nextrev = wr.Header.Revision + 1 + } + + events := make([]*mvccpb.Event, 0, len(wr.Events)) + + var lastRev int64 + for i := range wr.Events { + ev := (*mvccpb.Event)(wr.Events[i]) + if ev.Kv.ModRevision < w.nextrev { + continue + } else { + // We cannot update w.rev here. + // txn can have multiple events with the same rev. + // If w.nextrev updates here, it would skip events in the same txn. + lastRev = ev.Kv.ModRevision + } + + filtered := false + for _, filter := range w.filters { + if filter(*ev) { + filtered = true + break + } + } + if filtered { + continue + } + + if !w.prevKV { + evCopy := *ev + evCopy.PrevKv = nil + ev = &evCopy + } + events = append(events, ev) + } + + if lastRev >= w.nextrev { + w.nextrev = lastRev + 1 + } + + // all events are filtered out? + if !wr.IsProgressNotify() && !wr.Created && len(events) == 0 && wr.CompactRevision == 0 { + return + } + + w.lastHeader = wr.Header + w.post(&pb.WatchResponse{ + Header: &wr.Header, + Created: wr.Created, + CompactRevision: wr.CompactRevision, + Canceled: wr.Canceled, + WatchId: w.id, + Events: events, + }) +} + +// post puts a watch response on the watcher's proxy stream channel +func (w *watcher) post(wr *pb.WatchResponse) bool { + select { + case w.wps.watchCh <- wr: + case <-time.After(50 * time.Millisecond): + w.wps.cancel() + w.wps.lg.Error("failed to put a watch response on the watcher's proxy stream channel,err is timeout") + return false + } + return true +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/director.go b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/director.go new file mode 100644 index 0000000000..e20e2226a0 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/director.go @@ -0,0 +1,179 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package httpproxy + +import ( + "math/rand" + "net/url" + "sync" + "time" + + "go.uber.org/zap" +) + +// defaultRefreshInterval is the default proxyRefreshIntervalMs value +// as in etcdmain/config.go. +const defaultRefreshInterval = 30000 * time.Millisecond + +var once sync.Once + +func init() { + rand.Seed(time.Now().UnixNano()) +} + +func newDirector(lg *zap.Logger, urlsFunc GetProxyURLs, failureWait time.Duration, refreshInterval time.Duration) *director { + if lg == nil { + lg = zap.NewNop() + } + d := &director{ + lg: lg, + uf: urlsFunc, + failureWait: failureWait, + } + d.refresh() + go func() { + // In order to prevent missing proxy endpoints in the first try: + // when given refresh interval of defaultRefreshInterval or greater + // and whenever there is no available proxy endpoints, + // give 1-second refreshInterval. + for { + es := d.endpoints() + ri := refreshInterval + if ri >= defaultRefreshInterval { + if len(es) == 0 { + ri = time.Second + } + } + if len(es) > 0 { + once.Do(func() { + var sl []string + for _, e := range es { + sl = append(sl, e.URL.String()) + } + lg.Info("endpoints found", zap.Strings("endpoints", sl)) + }) + } + time.Sleep(ri) + d.refresh() + } + }() + return d +} + +type director struct { + sync.Mutex + lg *zap.Logger + ep []*endpoint + uf GetProxyURLs + failureWait time.Duration +} + +func (d *director) refresh() { + urls := d.uf() + d.Lock() + defer d.Unlock() + var endpoints []*endpoint + for _, u := range urls { + uu, err := url.Parse(u) + if err != nil { + d.lg.Info("upstream URL invalid", zap.Error(err)) + continue + } + endpoints = append(endpoints, newEndpoint(d.lg, *uu, d.failureWait)) + } + + // shuffle array to avoid connections being "stuck" to a single endpoint + for i := range endpoints { + j := rand.Intn(i + 1) + endpoints[i], endpoints[j] = endpoints[j], endpoints[i] + } + + d.ep = endpoints +} + +func (d *director) endpoints() []*endpoint { + d.Lock() + defer d.Unlock() + filtered := make([]*endpoint, 0) + for _, ep := range d.ep { + if ep.Available { + filtered = append(filtered, ep) + } + } + + return filtered +} + +func newEndpoint(lg *zap.Logger, u url.URL, failureWait time.Duration) *endpoint { + ep := endpoint{ + lg: lg, + URL: u, + Available: true, + failFunc: timedUnavailabilityFunc(failureWait), + } + + return &ep +} + +type endpoint struct { + sync.Mutex + + lg *zap.Logger + URL url.URL + Available bool + + failFunc func(ep *endpoint) +} + +func (ep *endpoint) Failed() { + ep.Lock() + if !ep.Available { + ep.Unlock() + return + } + + ep.Available = false + ep.Unlock() + + if ep.lg != nil { + ep.lg.Info("marked endpoint unavailable", zap.String("endpoint", ep.URL.String())) + } + + if ep.failFunc == nil { + if ep.lg != nil { + ep.lg.Info( + "no failFunc defined, endpoint will be unavailable forever", + zap.String("endpoint", ep.URL.String()), + ) + } + return + } + + ep.failFunc(ep) +} + +func timedUnavailabilityFunc(wait time.Duration) func(*endpoint) { + return func(ep *endpoint) { + time.AfterFunc(wait, func() { + ep.Available = true + if ep.lg != nil { + ep.lg.Info( + "marked endpoint available, to retest connectivity", + zap.String("endpoint", ep.URL.String()), + ) + } + }) + } +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/doc.go b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/doc.go new file mode 100644 index 0000000000..7a45099120 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/doc.go @@ -0,0 +1,18 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package httpproxy implements etcd httpproxy. The etcd proxy acts as a reverse +// http proxy forwarding client requests to active etcd cluster members, and does +// not participate in consensus. +package httpproxy diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/metrics.go b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/metrics.go new file mode 100644 index 0000000000..fcbedc28a8 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/metrics.go @@ -0,0 +1,90 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package httpproxy + +import ( + "net/http" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + requestsIncoming = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "proxy", + Name: "requests_total", + Help: "Counter requests incoming by method.", + }, []string{"method"}) + + requestsHandled = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "proxy", + Name: "handled_total", + Help: "Counter of requests fully handled (by authoratitave servers)", + }, []string{"method", "code"}) + + requestsDropped = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "etcd", + Subsystem: "proxy", + Name: "dropped_total", + Help: "Counter of requests dropped on the proxy.", + }, []string{"method", "proxying_error"}) + + requestsHandlingSec = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "proxy", + Name: "handling_duration_seconds", + Help: "Bucketed histogram of handling time of successful events (non-watches), by method (GET/PUT etc.).", + + // lowest bucket start of upper bound 0.0005 sec (0.5 ms) with factor 2 + // highest bucket start of 0.0005 sec * 2^12 == 2.048 sec + Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13), + }, []string{"method"}) +) + +type forwardingError string + +const ( + zeroEndpoints forwardingError = "zero_endpoints" + failedSendingRequest forwardingError = "failed_sending_request" + failedGettingResponse forwardingError = "failed_getting_response" +) + +func init() { + prometheus.MustRegister(requestsIncoming) + prometheus.MustRegister(requestsHandled) + prometheus.MustRegister(requestsDropped) + prometheus.MustRegister(requestsHandlingSec) +} + +func reportIncomingRequest(request *http.Request) { + requestsIncoming.WithLabelValues(request.Method).Inc() +} + +func reportRequestHandled(request *http.Request, response *http.Response, startTime time.Time) { + method := request.Method + requestsHandled.WithLabelValues(method, strconv.Itoa(response.StatusCode)).Inc() + requestsHandlingSec.WithLabelValues(method).Observe(time.Since(startTime).Seconds()) +} + +func reportRequestDropped(request *http.Request, err forwardingError) { + requestsDropped.WithLabelValues(request.Method, string(err)).Inc() +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/proxy.go b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/proxy.go new file mode 100644 index 0000000000..c8f27bf01d --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/proxy.go @@ -0,0 +1,121 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package httpproxy + +import ( + "encoding/json" + "net/http" + "strings" + "time" + + "go.uber.org/zap" + "golang.org/x/net/http2" +) + +const ( + // DefaultMaxIdleConnsPerHost indicates the default maximum idle connection + // count maintained between proxy and each member. We set it to 128 to + // let proxy handle 128 concurrent requests in long term smoothly. + // If the number of concurrent requests is bigger than this value, + // proxy needs to create one new connection when handling each request in + // the delta, which is bad because the creation consumes resource and + // may eat up ephemeral ports. + DefaultMaxIdleConnsPerHost = 128 +) + +// GetProxyURLs is a function which should return the current set of URLs to +// which client requests should be proxied. This function will be queried +// periodically by the proxy Handler to refresh the set of available +// backends. +type GetProxyURLs func() []string + +// NewHandler creates a new HTTP handler, listening on the given transport, +// which will proxy requests to an etcd cluster. +// The handler will periodically update its view of the cluster. +func NewHandler(lg *zap.Logger, t *http.Transport, urlsFunc GetProxyURLs, failureWait time.Duration, refreshInterval time.Duration) http.Handler { + if lg == nil { + lg = zap.NewNop() + } + if t.TLSClientConfig != nil { + // Enable http2, see Issue 5033. + err := http2.ConfigureTransport(t) + if err != nil { + lg.Info("Error enabling Transport HTTP/2 support", zap.Error(err)) + } + } + + p := &reverseProxy{ + lg: lg, + director: newDirector(lg, urlsFunc, failureWait, refreshInterval), + transport: t, + } + + mux := http.NewServeMux() + mux.Handle("/", p) + mux.HandleFunc("/v2/config/local/proxy", p.configHandler) + + return mux +} + +// NewReadonlyHandler wraps the given HTTP handler to allow only GET requests +func NewReadonlyHandler(hdlr http.Handler) http.Handler { + readonly := readonlyHandlerFunc(hdlr) + return http.HandlerFunc(readonly) +} + +func readonlyHandlerFunc(next http.Handler) func(http.ResponseWriter, *http.Request) { + return func(w http.ResponseWriter, req *http.Request) { + if req.Method != "GET" { + w.WriteHeader(http.StatusNotImplemented) + return + } + + next.ServeHTTP(w, req) + } +} + +func (p *reverseProxy) configHandler(w http.ResponseWriter, r *http.Request) { + if !allowMethod(w, r.Method, "GET") { + return + } + + eps := p.director.endpoints() + epstr := make([]string, len(eps)) + for i, e := range eps { + epstr[i] = e.URL.String() + } + + proxyConfig := struct { + Endpoints []string `json:"endpoints"` + }{ + Endpoints: epstr, + } + + json.NewEncoder(w).Encode(proxyConfig) +} + +// allowMethod verifies that the given method is one of the allowed methods, +// and if not, it writes an error to w. A boolean is returned indicating +// whether or not the method is allowed. +func allowMethod(w http.ResponseWriter, m string, ms ...string) bool { + for _, meth := range ms { + if m == meth { + return true + } + } + w.Header().Set("Allow", strings.Join(ms, ",")) + http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + return false +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/reverse.go b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/reverse.go new file mode 100644 index 0000000000..83247486b1 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/httpproxy/reverse.go @@ -0,0 +1,227 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package httpproxy + +import ( + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/url" + "strings" + "sync/atomic" + "time" + + "go.etcd.io/etcd/server/v3/etcdserver/api/v2http/httptypes" + + "go.uber.org/zap" +) + +var ( + // Hop-by-hop headers. These are removed when sent to the backend. + // http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html + // This list of headers borrowed from stdlib httputil.ReverseProxy + singleHopHeaders = []string{ + "Connection", + "Keep-Alive", + "Proxy-Authenticate", + "Proxy-Authorization", + "Te", // canonicalized version of "TE" + "Trailers", + "Transfer-Encoding", + "Upgrade", + } +) + +func removeSingleHopHeaders(hdrs *http.Header) { + for _, h := range singleHopHeaders { + hdrs.Del(h) + } +} + +type reverseProxy struct { + lg *zap.Logger + director *director + transport http.RoundTripper +} + +func (p *reverseProxy) ServeHTTP(rw http.ResponseWriter, clientreq *http.Request) { + reportIncomingRequest(clientreq) + proxyreq := new(http.Request) + *proxyreq = *clientreq + startTime := time.Now() + + var ( + proxybody []byte + err error + ) + + if clientreq.Body != nil { + proxybody, err = ioutil.ReadAll(clientreq.Body) + if err != nil { + msg := fmt.Sprintf("failed to read request body: %v", err) + p.lg.Info("failed to read request body", zap.Error(err)) + e := httptypes.NewHTTPError(http.StatusInternalServerError, "httpproxy: "+msg) + if we := e.WriteTo(rw); we != nil { + p.lg.Debug( + "error writing HTTPError to remote addr", + zap.String("remote-addr", clientreq.RemoteAddr), + zap.Error(we), + ) + } + return + } + } + + // deep-copy the headers, as these will be modified below + proxyreq.Header = make(http.Header) + copyHeader(proxyreq.Header, clientreq.Header) + + normalizeRequest(proxyreq) + removeSingleHopHeaders(&proxyreq.Header) + maybeSetForwardedFor(proxyreq) + + endpoints := p.director.endpoints() + if len(endpoints) == 0 { + msg := "zero endpoints currently available" + reportRequestDropped(clientreq, zeroEndpoints) + + // TODO: limit the rate of the error logging. + p.lg.Info(msg) + e := httptypes.NewHTTPError(http.StatusServiceUnavailable, "httpproxy: "+msg) + if we := e.WriteTo(rw); we != nil { + p.lg.Debug( + "error writing HTTPError to remote addr", + zap.String("remote-addr", clientreq.RemoteAddr), + zap.Error(we), + ) + } + return + } + + var requestClosed int32 + completeCh := make(chan bool, 1) + closeNotifier, ok := rw.(http.CloseNotifier) + ctx, cancel := context.WithCancel(context.Background()) + proxyreq = proxyreq.WithContext(ctx) + defer cancel() + if ok { + closeCh := closeNotifier.CloseNotify() + go func() { + select { + case <-closeCh: + atomic.StoreInt32(&requestClosed, 1) + p.lg.Info( + "client closed request prematurely", + zap.String("remote-addr", clientreq.RemoteAddr), + ) + cancel() + case <-completeCh: + } + }() + + defer func() { + completeCh <- true + }() + } + + var res *http.Response + + for _, ep := range endpoints { + if proxybody != nil { + proxyreq.Body = ioutil.NopCloser(bytes.NewBuffer(proxybody)) + } + redirectRequest(proxyreq, ep.URL) + + res, err = p.transport.RoundTrip(proxyreq) + if atomic.LoadInt32(&requestClosed) == 1 { + return + } + if err != nil { + reportRequestDropped(clientreq, failedSendingRequest) + p.lg.Info( + "failed to direct request", + zap.String("url", ep.URL.String()), + zap.Error(err), + ) + ep.Failed() + continue + } + + break + } + + if res == nil { + // TODO: limit the rate of the error logging. + msg := fmt.Sprintf("unable to get response from %d endpoint(s)", len(endpoints)) + reportRequestDropped(clientreq, failedGettingResponse) + p.lg.Info(msg) + e := httptypes.NewHTTPError(http.StatusBadGateway, "httpproxy: "+msg) + if we := e.WriteTo(rw); we != nil { + p.lg.Debug( + "error writing HTTPError to remote addr", + zap.String("remote-addr", clientreq.RemoteAddr), + zap.Error(we), + ) + } + return + } + + defer res.Body.Close() + reportRequestHandled(clientreq, res, startTime) + removeSingleHopHeaders(&res.Header) + copyHeader(rw.Header(), res.Header) + + rw.WriteHeader(res.StatusCode) + io.Copy(rw, res.Body) +} + +func copyHeader(dst, src http.Header) { + for k, vv := range src { + for _, v := range vv { + dst.Add(k, v) + } + } +} + +func redirectRequest(req *http.Request, loc url.URL) { + req.URL.Scheme = loc.Scheme + req.URL.Host = loc.Host +} + +func normalizeRequest(req *http.Request) { + req.Proto = "HTTP/1.1" + req.ProtoMajor = 1 + req.ProtoMinor = 1 + req.Close = false +} + +func maybeSetForwardedFor(req *http.Request) { + clientIP, _, err := net.SplitHostPort(req.RemoteAddr) + if err != nil { + return + } + + // If we aren't the first proxy retain prior + // X-Forwarded-For information as a comma+space + // separated list and fold multiple headers into one. + if prior, ok := req.Header["X-Forwarded-For"]; ok { + clientIP = strings.Join(prior, ", ") + ", " + clientIP + } + req.Header.Set("X-Forwarded-For", clientIP) +} diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/doc.go b/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/doc.go new file mode 100644 index 0000000000..6889cacb63 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/doc.go @@ -0,0 +1,16 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tcpproxy is an OSI level 4 proxy for routing etcd clients to etcd servers. +package tcpproxy diff --git a/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/userspace.go b/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/userspace.go new file mode 100644 index 0000000000..81421bffa7 --- /dev/null +++ b/vendor/go.etcd.io/etcd/server/v3/proxy/tcpproxy/userspace.go @@ -0,0 +1,231 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcpproxy + +import ( + "fmt" + "io" + "math/rand" + "net" + "sync" + "time" + + "go.uber.org/zap" +) + +type remote struct { + mu sync.Mutex + srv *net.SRV + addr string + inactive bool +} + +func (r *remote) inactivate() { + r.mu.Lock() + defer r.mu.Unlock() + r.inactive = true +} + +func (r *remote) tryReactivate() error { + conn, err := net.Dial("tcp", r.addr) + if err != nil { + return err + } + conn.Close() + r.mu.Lock() + defer r.mu.Unlock() + r.inactive = false + return nil +} + +func (r *remote) isActive() bool { + r.mu.Lock() + defer r.mu.Unlock() + return !r.inactive +} + +type TCPProxy struct { + Logger *zap.Logger + Listener net.Listener + Endpoints []*net.SRV + MonitorInterval time.Duration + + donec chan struct{} + + mu sync.Mutex // guards the following fields + remotes []*remote + pickCount int // for round robin +} + +func (tp *TCPProxy) Run() error { + tp.donec = make(chan struct{}) + if tp.MonitorInterval == 0 { + tp.MonitorInterval = 5 * time.Minute + } + for _, srv := range tp.Endpoints { + addr := fmt.Sprintf("%s:%d", srv.Target, srv.Port) + tp.remotes = append(tp.remotes, &remote{srv: srv, addr: addr}) + } + + eps := []string{} + for _, ep := range tp.Endpoints { + eps = append(eps, fmt.Sprintf("%s:%d", ep.Target, ep.Port)) + } + if tp.Logger != nil { + tp.Logger.Info("ready to proxy client requests", zap.Strings("endpoints", eps)) + } + + go tp.runMonitor() + for { + in, err := tp.Listener.Accept() + if err != nil { + return err + } + + go tp.serve(in) + } +} + +func (tp *TCPProxy) pick() *remote { + var weighted []*remote + var unweighted []*remote + + bestPr := uint16(65535) + w := 0 + // find best priority class + for _, r := range tp.remotes { + switch { + case !r.isActive(): + case r.srv.Priority < bestPr: + bestPr = r.srv.Priority + w = 0 + weighted = nil + unweighted = nil + fallthrough + case r.srv.Priority == bestPr: + if r.srv.Weight > 0 { + weighted = append(weighted, r) + w += int(r.srv.Weight) + } else { + unweighted = append(unweighted, r) + } + } + } + if weighted != nil { + if len(unweighted) > 0 && rand.Intn(100) == 1 { + // In the presence of records containing weights greater + // than 0, records with weight 0 should have a very small + // chance of being selected. + r := unweighted[tp.pickCount%len(unweighted)] + tp.pickCount++ + return r + } + // choose a uniform random number between 0 and the sum computed + // (inclusive), and select the RR whose running sum value is the + // first in the selected order + choose := rand.Intn(w) + for i := 0; i < len(weighted); i++ { + choose -= int(weighted[i].srv.Weight) + if choose <= 0 { + return weighted[i] + } + } + } + if unweighted != nil { + for i := 0; i < len(tp.remotes); i++ { + picked := tp.remotes[tp.pickCount%len(tp.remotes)] + tp.pickCount++ + if picked.isActive() { + return picked + } + } + } + return nil +} + +func (tp *TCPProxy) serve(in net.Conn) { + var ( + err error + out net.Conn + ) + + for { + tp.mu.Lock() + remote := tp.pick() + tp.mu.Unlock() + if remote == nil { + break + } + // TODO: add timeout + out, err = net.Dial("tcp", remote.addr) + if err == nil { + break + } + remote.inactivate() + if tp.Logger != nil { + tp.Logger.Warn("deactivated endpoint", zap.String("address", remote.addr), zap.Duration("interval", tp.MonitorInterval), zap.Error(err)) + } + } + + if out == nil { + in.Close() + return + } + + go func() { + io.Copy(in, out) + in.Close() + out.Close() + }() + + io.Copy(out, in) + out.Close() + in.Close() +} + +func (tp *TCPProxy) runMonitor() { + for { + select { + case <-time.After(tp.MonitorInterval): + tp.mu.Lock() + for _, rem := range tp.remotes { + if rem.isActive() { + continue + } + go func(r *remote) { + if err := r.tryReactivate(); err != nil { + if tp.Logger != nil { + tp.Logger.Warn("failed to activate endpoint (stay inactive for another interval)", zap.String("address", r.addr), zap.Duration("interval", tp.MonitorInterval), zap.Error(err)) + } + } else { + if tp.Logger != nil { + tp.Logger.Info("activated", zap.String("address", r.addr)) + } + } + }(rem) + } + tp.mu.Unlock() + case <-tp.donec: + return + } + } +} + +func (tp *TCPProxy) Stop() { + // graceful shutdown? + // shutdown current connections? + tp.Listener.Close() + close(tp.donec) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 099940e366..01359bcb0d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1161,6 +1161,11 @@ go.etcd.io/etcd/client/v3/concurrency go.etcd.io/etcd/client/v3/credentials go.etcd.io/etcd/client/v3/internal/endpoint go.etcd.io/etcd/client/v3/internal/resolver +go.etcd.io/etcd/client/v3/leasing +go.etcd.io/etcd/client/v3/namespace +go.etcd.io/etcd/client/v3/naming/endpoints +go.etcd.io/etcd/client/v3/naming/endpoints/internal +go.etcd.io/etcd/client/v3/ordering # go.etcd.io/etcd/pkg/v3 v3.5.3 => github.com/openshift/etcd/pkg/v3 v3.5.1-0.20220707134052-31b6b2d9b4d7 ## explicit; go 1.16 go.etcd.io/etcd/pkg/v3/adt @@ -1173,6 +1178,7 @@ go.etcd.io/etcd/pkg/v3/httputil go.etcd.io/etcd/pkg/v3/idutil go.etcd.io/etcd/pkg/v3/ioutil go.etcd.io/etcd/pkg/v3/netutil +go.etcd.io/etcd/pkg/v3/osutil go.etcd.io/etcd/pkg/v3/pbutil go.etcd.io/etcd/pkg/v3/runtime go.etcd.io/etcd/pkg/v3/schedule @@ -1191,6 +1197,7 @@ go.etcd.io/etcd/server/v3/auth go.etcd.io/etcd/server/v3/config go.etcd.io/etcd/server/v3/datadir go.etcd.io/etcd/server/v3/embed +go.etcd.io/etcd/server/v3/etcdmain go.etcd.io/etcd/server/v3/etcdserver go.etcd.io/etcd/server/v3/etcdserver/api go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp @@ -1223,7 +1230,11 @@ go.etcd.io/etcd/server/v3/lease/leasepb go.etcd.io/etcd/server/v3/mvcc go.etcd.io/etcd/server/v3/mvcc/backend go.etcd.io/etcd/server/v3/mvcc/buckets +go.etcd.io/etcd/server/v3/proxy/grpcproxy go.etcd.io/etcd/server/v3/proxy/grpcproxy/adapter +go.etcd.io/etcd/server/v3/proxy/grpcproxy/cache +go.etcd.io/etcd/server/v3/proxy/httpproxy +go.etcd.io/etcd/server/v3/proxy/tcpproxy go.etcd.io/etcd/server/v3/verify go.etcd.io/etcd/server/v3/wal go.etcd.io/etcd/server/v3/wal/walpb