From df6600248773b327cee184907914d4c6338e8a3d Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Tue, 10 Oct 2017 11:36:16 -0700 Subject: [PATCH 1/2] Set socket timeout on netlink sockets In case the file descriptor of the netlink socket is closed the recvfrom is not returning. This may create deadlock conditions. The current solution is to make sure that all the netlink socket used have a proper timeout set on them to have the possibility to return Added test to emulate the watchMiss condition Signed-off-by: Flavio Crisciani --- drivers/overlay/ov_network.go | 11 ++++++++++ drivers/overlay/overlay_test.go | 36 +++++++++++++++++++++++++++++++++ ipvs/ipvs.go | 16 +++++++++++++++ ipvs/netlink.go | 11 ++++++---- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 3fbfccf007..11314170b1 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -696,6 +696,12 @@ func (n *network) initSandbox(restore bool) error { var nlSock *nl.NetlinkSocket sbox.InvokeFunc(func() { nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) + if err != nil { + return + } + // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed + tv := syscall.NsecToTimeval(soTimeout.Nanoseconds()) + err = nlSock.SetReceiveTimeout(&tv) }) n.setNetlinkSocket(nlSock) @@ -721,6 +727,11 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { // The netlink socket got closed, simply exit to not leak this goroutine return } + // When the receive timeout expires the receive will return EAGAIN + if err == syscall.EAGAIN { + // we continue here to avoid spam for timeouts + continue + } logrus.Errorf("Failed to receive from netlink: %v ", err) continue } diff --git a/drivers/overlay/overlay_test.go b/drivers/overlay/overlay_test.go index 6d2127311d..75c89da6bb 100644 --- a/drivers/overlay/overlay_test.go +++ b/drivers/overlay/overlay_test.go @@ -1,7 +1,9 @@ package overlay import ( + "context" "net" + "syscall" "testing" "time" @@ -12,6 +14,7 @@ import ( "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/netlabel" _ "github.com/docker/libnetwork/testutils" + "github.com/vishvananda/netlink/nl" ) func init() { @@ -135,3 +138,36 @@ func TestOverlayType(t *testing.T) { dt.d.Type()) } } + +// Test that the netlink socket close unblock the watchMiss to avoid deadlock +func TestNetlinkSocket(t *testing.T) { + // This is the same code used by the overlay driver to create the netlink interface + // for the watch miss + nlSock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) + if err != nil { + t.Fatal() + } + // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed + tv := syscall.NsecToTimeval(soTimeout.Nanoseconds()) + err = nlSock.SetReceiveTimeout(&tv) + if err != nil { + t.Fatal() + } + n := &network{id: "testnetid"} + ch := make(chan error) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + go func() { + n.watchMiss(nlSock) + ch <- nil + }() + time.Sleep(5 * time.Second) + nlSock.Close() + select { + case <-ch: + case <-ctx.Done(): + { + t.Fatalf("Timeout expired") + } + } +} diff --git a/ipvs/ipvs.go b/ipvs/ipvs.go index ebcdd808c3..effbb716eb 100644 --- a/ipvs/ipvs.go +++ b/ipvs/ipvs.go @@ -5,12 +5,19 @@ package ipvs import ( "net" "syscall" + "time" "fmt" + "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" ) +const ( + netlinkRecvSocketsTimeout = 3 * time.Second + netlinkSendSocketTimeout = 30 * time.Second +) + // Service defines an IPVS service in its entirety. type Service struct { // Virtual service address. @@ -82,6 +89,15 @@ func New(path string) (*Handle, error) { if err != nil { return nil, err } + // Add operation timeout to avoid deadlocks + tv := syscall.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds()) + if err := sock.SetSendTimeout(&tv); err != nil { + return nil, err + } + tv = syscall.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds()) + if err := sock.SetReceiveTimeout(&tv); err != nil { + return nil, err + } return &Handle{sock: sock}, nil } diff --git a/ipvs/netlink.go b/ipvs/netlink.go index 2089283d14..c062a1789d 100644 --- a/ipvs/netlink.go +++ b/ipvs/netlink.go @@ -203,10 +203,6 @@ func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest { } func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) { - var ( - err error - ) - if err := s.Send(req); err != nil { return nil, err } @@ -222,6 +218,13 @@ done: for { msgs, err := s.Receive() if err != nil { + if s.GetFd() == -1 { + return nil, fmt.Errorf("Socket got closed on receive") + } + if err == syscall.EAGAIN { + // timeout fired + continue + } return nil, err } for _, m := range msgs { From 45ea903b238f9e0fe58fc636a96b8cd8311e7acd Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Tue, 10 Oct 2017 11:38:04 -0700 Subject: [PATCH 2/2] Vendor latest netlink library - needed the methods to set the proper timeout Signed-off-by: Flavio Crisciani --- vendor.conf | 2 +- .../github.com/vishvananda/netlink/README.md | 10 +- .../vishvananda/netlink/addr_linux.go | 52 +++- .../vishvananda/netlink/bridge_linux.go | 115 +++++++ .../vishvananda/netlink/conntrack_linux.go | 45 ++- .../github.com/vishvananda/netlink/filter.go | 10 - .../vishvananda/netlink/filter_linux.go | 66 ++-- .../vishvananda/netlink/handle_linux.go | 41 ++- .../vishvananda/netlink/handle_unspecified.go | 4 + vendor/github.com/vishvananda/netlink/link.go | 26 ++ .../vishvananda/netlink/link_linux.go | 286 +++++++++++++++--- .../github.com/vishvananda/netlink/neigh.go | 1 + .../vishvananda/netlink/neigh_linux.go | 29 +- .../netlink/netlink_unspecified.go | 4 + .../vishvananda/netlink/nl/bridge_linux.go | 74 +++++ .../vishvananda/netlink/nl/conntrack_linux.go | 4 +- .../vishvananda/netlink/nl/link_linux.go | 36 ++- .../vishvananda/netlink/nl/nl_linux.go | 14 + .../vishvananda/netlink/qdisc_linux.go | 72 ++--- .../github.com/vishvananda/netlink/route.go | 62 +++- .../vishvananda/netlink/route_linux.go | 101 ++++++- vendor/github.com/vishvananda/netlink/rule.go | 1 + .../vishvananda/netlink/rule_linux.go | 3 + 23 files changed, 898 insertions(+), 160 deletions(-) create mode 100644 vendor/github.com/vishvananda/netlink/bridge_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/bridge_linux.go diff --git a/vendor.conf b/vendor.conf index f00d8ced44..bdcb5a1a54 100644 --- a/vendor.conf +++ b/vendor.conf @@ -45,7 +45,7 @@ github.com/sirupsen/logrus v1.0.3 github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065 -github.com/vishvananda/netlink bd6d5de5ccef2d66b0a26177928d0d8895d7f969 +github.com/vishvananda/netlink b2de5d10e38ecce8607e6b438b6d174f389a004e github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 golang.org/x/crypto 558b6879de74bc843225cde5686419267ff707ca golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6 diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md index 2367fae097..0b61be217e 100644 --- a/vendor/github.com/vishvananda/netlink/README.md +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -38,15 +38,18 @@ Add a new bridge and add eth1 into it: package main import ( - "net" + "fmt" "github.com/vishvananda/netlink" ) func main() { la := netlink.NewLinkAttrs() la.Name = "foo" - mybridge := &netlink.Bridge{la}} - _ := netlink.LinkAdd(mybridge) + mybridge := &netlink.Bridge{LinkAttrs: la} + err := netlink.LinkAdd(mybridge) + if err != nil { + fmt.Printf("could not add %s: %v\n", la.Name, err) + } eth1, _ := netlink.LinkByName("eth1") netlink.LinkSetMaster(eth1, mybridge) } @@ -63,7 +66,6 @@ Add a new ip address to loopback: package main import ( - "net" "github.com/vishvananda/netlink" ) diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index f33242a7c2..8808b42d9b 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -2,7 +2,6 @@ package netlink import ( "fmt" - "log" "net" "strings" "syscall" @@ -65,7 +64,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error msg := nl.NewIfAddrmsg(family) msg.Index = uint32(base.Index) msg.Scope = uint8(addr.Scope) - prefixlen, _ := addr.Mask.Size() + prefixlen, masklen := addr.Mask.Size() msg.Prefixlen = uint8(prefixlen) req.AddData(msg) @@ -103,9 +102,14 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error } } - if addr.Broadcast != nil { - req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast)) + if addr.Broadcast == nil { + calcBroadcast := make(net.IP, masklen/8) + for i := range localAddrData { + calcBroadcast[i] = localAddrData[i] | ^addr.Mask[i] + } + addr.Broadcast = calcBroadcast } + req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast)) if addr.Label != "" { labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label)) @@ -232,16 +236,34 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribe(netns.None(), netns.None(), ch, done) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribe(ns, netns.None(), ch, done) + return addrSubscribeAt(ns, netns.None(), ch, done, nil) +} + +// AddrSubscribeOptions contains a set of options to use with +// AddrSubscribeWithOptions. +type AddrSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) } -func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { +// AddrSubscribeWithOptions work like AddrSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, options AddrSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback) +} + +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error)) error { s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR) if err != nil { return err @@ -257,20 +279,26 @@ func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-cha for { msgs, err := s.Receive() if err != nil { - log.Printf("netlink.AddrSubscribe: Receive() error: %v", err) + if cberr != nil { + cberr(err) + } return } for _, m := range msgs { msgType := m.Header.Type if msgType != syscall.RTM_NEWADDR && msgType != syscall.RTM_DELADDR { - log.Printf("netlink.AddrSubscribe: bad message type: %d", msgType) - continue + if cberr != nil { + cberr(fmt.Errorf("bad message type: %d", msgType)) + } + return } addr, _, ifindex, err := parseAddr(m.Data) if err != nil { - log.Printf("netlink.AddrSubscribe: could not parse address: %v", err) - continue + if cberr != nil { + cberr(fmt.Errorf("could not parse address: %v", err)) + } + return } ch <- AddrUpdate{LinkAddress: *addr.IPNet, diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go new file mode 100644 index 0000000000..a65d6a1319 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -0,0 +1,115 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// BridgeVlanList gets a map of device id to bridge vlan infos. +// Equivalent to: `bridge vlan show` +func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { + return pkgHandle.BridgeVlanList() +} + +// BridgeVlanList gets a map of device id to bridge vlan infos. +// Equivalent to: `bridge vlan show` +func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { + req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) + msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN)))) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK) + if err != nil { + return nil, err + } + ret := make(map[int32][]*nl.BridgeVlanInfo) + for _, m := range msgs { + msg := nl.DeserializeIfInfomsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.IFLA_AF_SPEC: + //nested attr + nestAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, fmt.Errorf("failed to parse nested attr %v", err) + } + for _, nestAttr := range nestAttrs { + switch nestAttr.Attr.Type { + case nl.IFLA_BRIDGE_VLAN_INFO: + vlanInfo := nl.DeserializeBridgeVlanInfo(nestAttr.Value) + ret[msg.Index] = append(ret[msg.Index], vlanInfo) + } + } + } + } + } + return ret, nil +} + +// BridgeVlanAdd adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanAdd(link, vid, pvid, untagged, self, master) +} + +// BridgeVlanAdd adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(syscall.RTM_SETLINK, link, vid, pvid, untagged, self, master) +} + +// BridgeVlanDel adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanDel(link, vid, pvid, untagged, self, master) +} + +// BridgeVlanDel adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(syscall.RTM_DELLINK, link, vid, pvid, untagged, self, master) +} + +func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(cmd, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + msg.Index = int32(base.Index) + req.AddData(msg) + + br := nl.NewRtAttr(nl.IFLA_AF_SPEC, nil) + var flags uint16 + if self { + flags |= nl.BRIDGE_FLAGS_SELF + } + if master { + flags |= nl.BRIDGE_FLAGS_MASTER + } + if flags > 0 { + nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags)) + } + vlanInfo := &nl.BridgeVlanInfo{Vid: vid} + if pvid { + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_PVID + } + if untagged { + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED + } + nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + req.AddData(br) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go index 20df903006..ecf0445659 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -22,7 +22,11 @@ const ( // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2 ConntrackExpectTable = 2 ) - +const ( + // For Parsing Mark + TCP_PROTO = 6 + UDP_PROTO = 17 +) const ( // backward compatibility with golang 1.6 which does not have io.SeekCurrent seekCurrent = 1 @@ -56,7 +60,7 @@ func ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation -func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { +func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { return pkgHandle.ConntrackDeleteFilter(table, family, filter) } @@ -88,7 +92,7 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation -func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { +func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { res, err := h.dumpConntrackTable(table, family) if err != nil { return 0, err @@ -142,15 +146,16 @@ type ConntrackFlow struct { FamilyType uint8 Forward ipTuple Reverse ipTuple + Mark uint32 } func (s *ConntrackFlow) String() string { // conntrack cmd output: - // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 - return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d", + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 mark=0 + return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d mark=%d", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, - s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort) + s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Mark) } // This method parse the ip tuple structure @@ -160,7 +165,7 @@ func (s *ConntrackFlow) String() string { // // // -func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) { +func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { for i := 0; i < 2; i++ { _, t, _, v := parseNfAttrTLV(reader) switch t { @@ -189,6 +194,7 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) { // Skip some padding 2 byte reader.Seek(2, seekCurrent) } + return tpl.Protocol } func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) { @@ -216,6 +222,7 @@ func parseBERaw16(r *bytes.Reader, v *uint16) { func parseRawData(data []byte) *ConntrackFlow { s := &ConntrackFlow{} + var proto uint8 // First there is the Nfgenmsg header // consume only the family field reader := bytes.NewReader(data) @@ -234,7 +241,7 @@ func parseRawData(data []byte) *ConntrackFlow { nested, t, l := parseNfAttrTL(reader) if nested && t == nl.CTA_TUPLE_ORIG { if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { - parseIpTuple(reader, &s.Forward) + proto = parseIpTuple(reader, &s.Forward) } } else if nested && t == nl.CTA_TUPLE_REPLY { if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { @@ -248,7 +255,19 @@ func parseRawData(data []byte) *ConntrackFlow { } } } - + if proto == TCP_PROTO { + reader.Seek(64, seekCurrent) + _, t, _, v := parseNfAttrTLV(reader) + if t == nl.CTA_MARK { + s.Mark = uint32(v[3]) + } + } else if proto == UDP_PROTO { + reader.Seek(16, seekCurrent) + _, t, _, v := parseNfAttrTLV(reader) + if t == nl.CTA_MARK { + s.Mark = uint32(v[3]) + } + } return s } @@ -290,6 +309,12 @@ const ( ConntrackNatAnyIP // -any-nat ip Source or destination NAT ip ) +type CustomConntrackFilter interface { + // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches + // the filter or false otherwise + MatchConntrackFlow(flow *ConntrackFlow) bool +} + type ConntrackFilter struct { ipFilter map[ConntrackFilterType]net.IP } @@ -342,3 +367,5 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { return match } + +var _ CustomConntrackFilter = (*ConntrackFilter)(nil) diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go index 938b28b0b0..1120c79d6a 100644 --- a/vendor/github.com/vishvananda/netlink/filter.go +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -2,8 +2,6 @@ package netlink import ( "fmt" - - "github.com/vishvananda/netlink/nl" ) type Filter interface { @@ -184,14 +182,6 @@ func NewMirredAction(redirIndex int) *MirredAction { } } -// Constants used in TcU32Sel.Flags. -const ( - TC_U32_TERMINAL = nl.TC_U32_TERMINAL - TC_U32_OFFSET = nl.TC_U32_OFFSET - TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET - TC_U32_EAT = nl.TC_U32_EAT -) - // Sel of the U32 filters that contains multiple TcU32Key. This is the copy // and the frontend representation of nl.TcU32Sel. It is serialized into canonical // nl.TcU32Sel with the appropriate endianness. diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index dc0f90af88..5025bd56c1 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -11,6 +11,14 @@ import ( "github.com/vishvananda/netlink/nl" ) +// Constants used in TcU32Sel.Flags. +const ( + TC_U32_TERMINAL = nl.TC_U32_TERMINAL + TC_U32_OFFSET = nl.TC_U32_OFFSET + TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET + TC_U32_EAT = nl.TC_U32_EAT +) + // Fw filter filters on firewall marks // NOTE: this is in filter_linux because it refers to nl.TcPolice which // is defined in nl/tc_linux.go @@ -128,9 +136,11 @@ func (h *Handle) FilterAdd(filter Filter) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if u32, ok := filter.(*U32); ok { + + switch filter := filter.(type) { + case *U32: // Convert TcU32Sel into nl.TcU32Sel as it is without copy. - sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel)) + sel := (*nl.TcU32Sel)(unsafe.Pointer(filter.Sel)) if sel == nil { // match all sel = &nl.TcU32Sel{ @@ -158,56 +168,56 @@ func (h *Handle) FilterAdd(filter Filter) error { } sel.Nkeys = uint8(len(sel.Keys)) nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) - if u32.ClassId != 0 { - nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId)) + if filter.ClassId != 0 { + nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId)) } actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) // backwards compatibility - if u32.RedirIndex != 0 { - u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...) + if filter.RedirIndex != 0 { + filter.Actions = append([]Action{NewMirredAction(filter.RedirIndex)}, filter.Actions...) } - if err := EncodeActions(actionsAttr, u32.Actions); err != nil { + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { return err } - } else if fw, ok := filter.(*Fw); ok { - if fw.Mask != 0 { + case *Fw: + if filter.Mask != 0 { b := make([]byte, 4) - native.PutUint32(b, fw.Mask) + native.PutUint32(b, filter.Mask) nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b) } - if fw.InDev != "" { - nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev)) + if filter.InDev != "" { + nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) } - if (fw.Police != nl.TcPolice{}) { + if (filter.Police != nl.TcPolice{}) { police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil) - nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize()) - if (fw.Police.Rate != nl.TcRateSpec{}) { - payload := SerializeRtab(fw.Rtab) + nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, filter.Police.Serialize()) + if (filter.Police.Rate != nl.TcRateSpec{}) { + payload := SerializeRtab(filter.Rtab) nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload) } - if (fw.Police.PeakRate != nl.TcRateSpec{}) { - payload := SerializeRtab(fw.Ptab) + if (filter.Police.PeakRate != nl.TcRateSpec{}) { + payload := SerializeRtab(filter.Ptab) nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload) } } - if fw.ClassId != 0 { + if filter.ClassId != 0 { b := make([]byte, 4) - native.PutUint32(b, fw.ClassId) + native.PutUint32(b, filter.ClassId) nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b) } - } else if bpf, ok := filter.(*BpfFilter); ok { + case *BpfFilter: var bpfFlags uint32 - if bpf.ClassId != 0 { - nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(bpf.ClassId)) + if filter.ClassId != 0 { + nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId)) } - if bpf.Fd >= 0 { - nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(bpf.Fd)))) + if filter.Fd >= 0 { + nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd)))) } - if bpf.Name != "" { - nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(bpf.Name)) + if filter.Name != "" { + nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name)) } - if bpf.DirectAction { + if filter.DirectAction { bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT } nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags)) diff --git a/vendor/github.com/vishvananda/netlink/handle_linux.go b/vendor/github.com/vishvananda/netlink/handle_linux.go index a04ceae6b6..d37b087c33 100644 --- a/vendor/github.com/vishvananda/netlink/handle_linux.go +++ b/vendor/github.com/vishvananda/netlink/handle_linux.go @@ -45,12 +45,27 @@ func (h *Handle) SetSocketTimeout(to time.Duration) error { } tv := syscall.NsecToTimeval(to.Nanoseconds()) for _, sh := range h.sockets { - fd := sh.Socket.GetFd() - err := syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, &tv) - if err != nil { + if err := sh.Socket.SetSendTimeout(&tv); err != nil { return err } - err = syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, &tv) + if err := sh.Socket.SetReceiveTimeout(&tv); err != nil { + return err + } + } + return nil +} + +// SetSocketReceiveBufferSize sets the receive buffer size for each +// socket in the netlink handle. The maximum value is capped by +// /proc/sys/net/core/rmem_max. +func (h *Handle) SetSocketReceiveBufferSize(size int, force bool) error { + opt := syscall.SO_RCVBUF + if force { + opt = syscall.SO_RCVBUFFORCE + } + for _, sh := range h.sockets { + fd := sh.Socket.GetFd() + err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, opt, size) if err != nil { return err } @@ -58,6 +73,24 @@ func (h *Handle) SetSocketTimeout(to time.Duration) error { return nil } +// GetSocketReceiveBufferSize gets the receiver buffer size for each +// socket in the netlink handle. The retrieved value should be the +// double to the one set for SetSocketReceiveBufferSize. +func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) { + results := make([]int, len(h.sockets)) + i := 0 + for _, sh := range h.sockets { + fd := sh.Socket.GetFd() + size, err := syscall.GetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF) + if err != nil { + return nil, err + } + results[i] = size + i++ + } + return results, nil +} + // NewHandle returns a netlink handle on the network namespace // specified by ns. If ns=netns.None(), current network namespace // will be assumed diff --git a/vendor/github.com/vishvananda/netlink/handle_unspecified.go b/vendor/github.com/vishvananda/netlink/handle_unspecified.go index 32cf022732..7da21a6a18 100644 --- a/vendor/github.com/vishvananda/netlink/handle_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/handle_unspecified.go @@ -145,6 +145,10 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error { return ErrNotImplemented } +func (h *Handle) LinkSetTxQLen(link Link, qlen int) error { + return ErrNotImplemented +} + func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { return ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go index 547e92ec12..5aa3a1790a 100644 --- a/vendor/github.com/vishvananda/netlink/link.go +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -37,6 +37,7 @@ type LinkAttrs struct { EncapType string Protinfo *Protinfo OperState LinkOperState + NetNsID int } // LinkOperState represents the values of the IFLA_OPERSTATE link @@ -171,6 +172,7 @@ type LinkXdp struct { Fd int Attached bool Flags uint32 + ProgId uint32 } // Device links cannot be created via netlink. These links @@ -339,6 +341,7 @@ type Vxlan struct { UDPCSum bool NoAge bool GBP bool + FlowBased bool Age int Limit int Port int @@ -684,6 +687,7 @@ type Gretap struct { EncapType uint16 EncapFlags uint16 Link uint32 + FlowBased bool } func (gretap *Gretap) Attrs() *LinkAttrs { @@ -729,6 +733,28 @@ func (iptun *Vti) Type() string { return "vti" } +type Gretun struct { + LinkAttrs + Link uint32 + IFlags uint16 + OFlags uint16 + IKey uint32 + OKey uint32 + Local net.IP + Remote net.IP + Ttl uint8 + Tos uint8 + PMtuDisc uint8 +} + +func (gretun *Gretun) Attrs() *LinkAttrs { + return &gretun.LinkAttrs +} + +func (gretun *Gretun) Type() string { + return "gre" +} + type Vrf struct { LinkAttrs Table uint32 diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index 1c1bc52c4d..e94fd9766c 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -379,6 +379,74 @@ func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error { return err } +// LinkSetVfSpoofchk enables/disables spoof check on a vf for the link. +// Equivalent to: `ip link set $link vf $vf spoofchk $check` +func LinkSetVfSpoofchk(link Link, vf int, check bool) error { + return pkgHandle.LinkSetVfSpoofchk(link, vf, check) +} + +// LinkSetVfSpookfchk enables/disables spoof check on a vf for the link. +// Equivalent to: `ip link set $link vf $vf spoofchk $check` +func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error { + var setting uint32 + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) + info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + if check { + setting = 1 + } + vfmsg := nl.VfSpoofchk{ + Vf: uint32(vf), + Setting: setting, + } + nl.NewRtAttrChild(info, nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfTrust enables/disables trust state on a vf for the link. +// Equivalent to: `ip link set $link vf $vf trust $state` +func LinkSetVfTrust(link Link, vf int, state bool) error { + return pkgHandle.LinkSetVfTrust(link, vf, state) +} + +// LinkSetVfTrust enables/disables trust state on a vf for the link. +// Equivalent to: `ip link set $link vf $vf trust $state` +func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error { + var setting uint32 + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) + info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + if state { + setting = 1 + } + vfmsg := nl.VfTrust{ + Vf: uint32(vf), + Setting: setting, + } + nl.NewRtAttrChild(info, nl.IFLA_VF_TRUST, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + // LinkSetMaster sets the master of the link device. // Equivalent to: `ip link set $link master $master` func LinkSetMaster(link Link, master *Bridge) error { @@ -500,6 +568,12 @@ func (h *Handle) LinkSetNsFd(link Link, fd int) error { // LinkSetXdpFd adds a bpf function to the driver. The fd must be a bpf // program loaded with bpf(type=BPF_PROG_TYPE_XDP) func LinkSetXdpFd(link Link, fd int) error { + return LinkSetXdpFdWithFlags(link, fd, 0) +} + +// LinkSetXdpFdWithFlags adds a bpf function to the driver with the given +// options. The fd must be a bpf program loaded with bpf(type=BPF_PROG_TYPE_XDP) +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { base := link.Attrs() ensureIndex(base) req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) @@ -508,7 +582,7 @@ func LinkSetXdpFd(link Link, fd int) error { msg.Index = int32(base.Index) req.AddData(msg) - addXdpAttrs(&LinkXdp{Fd: fd}, req) + addXdpAttrs(&LinkXdp{Fd: fd, Flags: uint32(flags)}, req) _, err := req.Execute(syscall.NETLINK_ROUTE, 0) return err @@ -528,7 +602,13 @@ type vxlanPortRange struct { func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + + if vxlan.FlowBased { + vxlan.VxlanId = 0 + } + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId))) + if vxlan.VtepDevIndex != 0 { nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex))) } @@ -569,6 +649,9 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { if vxlan.GBP { nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{}) } + if vxlan.FlowBased { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased)) + } if vxlan.NoAge { nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) } else if vxlan.Age > 0 { @@ -818,16 +901,17 @@ func (h *Handle) linkModify(link Link, flags int) error { linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil) nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) - if vlan, ok := link.(*Vlan); ok { + switch link := link.(type) { + case *Vlan: b := make([]byte, 2) - native.PutUint16(b, uint16(vlan.VlanId)) + native.PutUint16(b, uint16(link.VlanId)) data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) nl.NewRtAttrChild(data, nl.IFLA_VLAN_ID, b) - } else if veth, ok := link.(*Veth); ok { + case *Veth: data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil) nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC) - nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName)) + nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(link.PeerName)) if base.TxQLen >= 0 { nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) } @@ -835,35 +919,37 @@ func (h *Handle) linkModify(link Link, flags int) error { nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) } - } else if vxlan, ok := link.(*Vxlan); ok { - addVxlanAttrs(vxlan, linkInfo) - } else if bond, ok := link.(*Bond); ok { - addBondAttrs(bond, linkInfo) - } else if ipv, ok := link.(*IPVlan); ok { + case *Vxlan: + addVxlanAttrs(link, linkInfo) + case *Bond: + addBondAttrs(link, linkInfo) + case *IPVlan: data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(ipv.Mode))) - } else if macv, ok := link.(*Macvlan); ok { - if macv.Mode != MACVLAN_MODE_DEFAULT { + nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + case *Macvlan: + if link.Mode != MACVLAN_MODE_DEFAULT { data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode])) + nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) } - } else if macv, ok := link.(*Macvtap); ok { - if macv.Mode != MACVLAN_MODE_DEFAULT { + case *Macvtap: + if link.Mode != MACVLAN_MODE_DEFAULT { data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode])) + nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) } - } else if gretap, ok := link.(*Gretap); ok { - addGretapAttrs(gretap, linkInfo) - } else if iptun, ok := link.(*Iptun); ok { - addIptunAttrs(iptun, linkInfo) - } else if vti, ok := link.(*Vti); ok { - addVtiAttrs(vti, linkInfo) - } else if vrf, ok := link.(*Vrf); ok { - addVrfAttrs(vrf, linkInfo) - } else if bridge, ok := link.(*Bridge); ok { - addBridgeAttrs(bridge, linkInfo) - } else if gtp, ok := link.(*GTP); ok { - addGTPAttrs(gtp, linkInfo) + case *Gretap: + addGretapAttrs(link, linkInfo) + case *Iptun: + addIptunAttrs(link, linkInfo) + case *Gretun: + addGretunAttrs(link, linkInfo) + case *Vti: + addVtiAttrs(link, linkInfo) + case *Vrf: + addVrfAttrs(link, linkInfo) + case *Bridge: + addBridgeAttrs(link, linkInfo) + case *GTP: + addGTPAttrs(link, linkInfo) } req.AddData(linkInfo) @@ -1093,6 +1179,8 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { link = &Gretap{} case "ipip": link = &Iptun{} + case "gre": + link = &Gretun{} case "vti": link = &Vti{} case "vrf": @@ -1124,6 +1212,8 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { parseGretapData(link, data) case "ipip": parseIptunData(link, data) + case "gre": + parseGretunData(link, data) case "vti": parseVtiData(link, data) case "vrf": @@ -1178,6 +1268,8 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { } case syscall.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) + case nl.IFLA_LINK_NETNSID: + base.NetNsID = int(native.Uint32(attr.Value[0:4])) } } @@ -1239,16 +1331,34 @@ type LinkUpdate struct { // LinkSubscribe takes a chan down which notifications will be sent // when links change. Close the 'done' chan to stop subscription. func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribe(netns.None(), netns.None(), ch, done) + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil) } // LinkSubscribeAt works like LinkSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribe(ns, netns.None(), ch, done) + return linkSubscribeAt(ns, netns.None(), ch, done, nil) +} + +// LinkSubscribeOptions contains a set of options to use with +// LinkSubscribeWithOptions. +type LinkSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) +} + +// LinkSubscribeWithOptions work like LinkSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, options LinkSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback) } -func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error)) error { s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) if err != nil { return err @@ -1264,12 +1374,18 @@ func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-cha for { msgs, err := s.Receive() if err != nil { + if cberr != nil { + cberr(err) + } return } for _, m := range msgs { ifmsg := nl.DeserializeIfInfomsg(m.Data) link, err := LinkDeserialize(&m.Header, m.Data) if err != nil { + if cberr != nil { + cberr(err) + } return } ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: m.Header, Link: link} @@ -1363,6 +1479,33 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { return nil } +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func LinkSetTxQLen(link Link, qlen int) error { + return pkgHandle.LinkSetTxQLen(link, qlen) +} + +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func (h *Handle) LinkSetTxQLen(link Link, qlen int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(qlen)) + + data := nl.NewRtAttr(syscall.IFLA_TXQLEN, b) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { vlan := link.(*Vlan) for _, datum := range data { @@ -1407,6 +1550,8 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan.UDPCSum = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_GBP: vxlan.GBP = true + case nl.IFLA_VXLAN_FLOWBASED: + vxlan.FlowBased = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_AGEING: vxlan.Age = int(native.Uint32(datum.Value[0:4])) vxlan.NoAge = vxlan.Age == 0 @@ -1547,6 +1692,12 @@ func linkFlags(rawFlags uint32) net.Flags { func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + if gretap.FlowBased { + // In flow based mode, no other attributes need to be configured + nl.NewRtAttrChild(data, nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + return + } + ip := gretap.Local.To4() if ip != nil { nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip)) @@ -1613,6 +1764,69 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { gre.EncapType = native.Uint16(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_FLAGS: gre.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = int8(datum.Value[0]) != 0 + } + } +} + +func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + + ip := gre.Local.To4() + if ip != nil { + nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip)) + } + ip = gre.Remote.To4() + if ip != nil { + nl.NewRtAttrChild(data, nl.IFLA_GRE_REMOTE, []byte(ip)) + } + + if gre.IKey != 0 { + nl.NewRtAttrChild(data, nl.IFLA_GRE_IKEY, htonl(gre.IKey)) + gre.IFlags |= uint16(nl.GRE_KEY) + } + + if gre.OKey != 0 { + nl.NewRtAttrChild(data, nl.IFLA_GRE_OKEY, htonl(gre.OKey)) + gre.OFlags |= uint16(nl.GRE_KEY) + } + + nl.NewRtAttrChild(data, nl.IFLA_GRE_IFLAGS, htons(gre.IFlags)) + nl.NewRtAttrChild(data, nl.IFLA_GRE_OFLAGS, htons(gre.OFlags)) + + if gre.Link != 0 { + nl.NewRtAttrChild(data, nl.IFLA_GRE_LINK, nl.Uint32Attr(gre.Link)) + } + + nl.NewRtAttrChild(data, nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gre.PMtuDisc)) + nl.NewRtAttrChild(data, nl.IFLA_GRE_TTL, nl.Uint8Attr(gre.Ttl)) + nl.NewRtAttrChild(data, nl.IFLA_GRE_TOS, nl.Uint8Attr(gre.Tos)) +} + +func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { + gre := link.(*Gretun) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GRE_OKEY: + gre.IKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_IKEY: + gre.OKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_LOCAL: + gre.Local = net.IP(datum.Value[0:4]) + case nl.IFLA_GRE_REMOTE: + gre.Remote = net.IP(datum.Value[0:4]) + case nl.IFLA_GRE_IFLAGS: + gre.IFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_OFLAGS: + gre.OFlags = ntohs(datum.Value[0:2]) + + case nl.IFLA_GRE_TTL: + gre.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GRE_TOS: + gre.Tos = uint8(datum.Value[0]) + case nl.IFLA_GRE_PMTUDISC: + gre.PMtuDisc = uint8(datum.Value[0]) } } } @@ -1630,8 +1844,10 @@ func addXdpAttrs(xdp *LinkXdp, req *nl.NetlinkRequest) { b := make([]byte, 4) native.PutUint32(b, uint32(xdp.Fd)) nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FD, b) - native.PutUint32(b, xdp.Flags) - nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FLAGS, b) + if xdp.Flags != 0 { + native.PutUint32(b, xdp.Flags) + nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FLAGS, b) + } req.AddData(attrs) } @@ -1649,6 +1865,8 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { xdp.Attached = attr.Value[0] != 0 case nl.IFLA_XDP_FLAGS: xdp.Flags = native.Uint32(attr.Value[0:4]) + case nl.IFLA_XDP_PROG_ID: + xdp.ProgId = native.Uint32(attr.Value[0:4]) } } return xdp, nil diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go index 0e5eb90c9e..6a6f71ce86 100644 --- a/vendor/github.com/vishvananda/netlink/neigh.go +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -14,6 +14,7 @@ type Neigh struct { Flags int IP net.IP HardwareAddr net.HardwareAddr + LLIPAddr net.IP //Used in the case of NHRP } // String returns $ip/$hwaddr $label diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go index f069db2573..5edc8b4125 100644 --- a/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -128,6 +128,7 @@ func (h *Handle) NeighDel(neigh *Neigh) error { func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { var family int + if neigh.Family > 0 { family = neigh.Family } else { @@ -151,7 +152,10 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { dstData := nl.NewRtAttr(NDA_DST, ipData) req.AddData(dstData) - if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { + if neigh.LLIPAddr != nil { + llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) + req.AddData(llIPData) + } else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) req.AddData(hwData) } @@ -237,12 +241,33 @@ func NeighDeserialize(m []byte) (*Neigh, error) { return nil, err } + // This should be cached for perfomance + // once per table dump + link, err := LinkByIndex(neigh.LinkIndex) + if err != nil { + return nil, err + } + encapType := link.Attrs().EncapType + for _, attr := range attrs { switch attr.Attr.Type { case NDA_DST: neigh.IP = net.IP(attr.Value) case NDA_LLADDR: - neigh.HardwareAddr = net.HardwareAddr(attr.Value) + // BUG: Is this a bug in the netlink library? + // #define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) + // #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + attrLen := attr.Attr.Len - syscall.SizeofRtAttr + if attrLen == 4 && (encapType == "ipip" || + encapType == "sit" || + encapType == "gre") { + neigh.LLIPAddr = net.IP(attr.Value) + } else if attrLen == 16 && + encapType == "tunnel6" { + neigh.IP = net.IP(attr.Value) + } else { + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } } } diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 2d57c16d74..86111b92ce 100644 --- a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -108,6 +108,10 @@ func LinkSetFlood(link Link, mode bool) error { return ErrNotImplemented } +func LinkSetTxQLen(link Link, qlen int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go new file mode 100644 index 0000000000..6c0d333387 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go @@ -0,0 +1,74 @@ +package nl + +import ( + "fmt" + "unsafe" +) + +const ( + SizeofBridgeVlanInfo = 0x04 +) + +/* Bridge Flags */ +const ( + BRIDGE_FLAGS_MASTER = iota /* Bridge command to/from master */ + BRIDGE_FLAGS_SELF /* Bridge command to/from lowerdev */ +) + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] + * } + */ +const ( + IFLA_BRIDGE_FLAGS = iota + IFLA_BRIDGE_MODE + IFLA_BRIDGE_VLAN_INFO +) + +const ( + BRIDGE_VLAN_INFO_MASTER = 1 << iota + BRIDGE_VLAN_INFO_PVID + BRIDGE_VLAN_INFO_UNTAGGED + BRIDGE_VLAN_INFO_RANGE_BEGIN + BRIDGE_VLAN_INFO_RANGE_END +) + +// struct bridge_vlan_info { +// __u16 flags; +// __u16 vid; +// }; + +type BridgeVlanInfo struct { + Flags uint16 + Vid uint16 +} + +func (b *BridgeVlanInfo) Serialize() []byte { + return (*(*[SizeofBridgeVlanInfo]byte)(unsafe.Pointer(b)))[:] +} + +func DeserializeBridgeVlanInfo(b []byte) *BridgeVlanInfo { + return (*BridgeVlanInfo)(unsafe.Pointer(&b[0:SizeofBridgeVlanInfo][0])) +} + +func (b *BridgeVlanInfo) PortVID() bool { + return b.Flags&BRIDGE_VLAN_INFO_PVID > 0 +} + +func (b *BridgeVlanInfo) EngressUntag() bool { + return b.Flags&BRIDGE_VLAN_INFO_UNTAGGED > 0 +} + +func (b *BridgeVlanInfo) String() string { + return fmt.Sprintf("%+v", *b) +} + +/* New extended info filters for IFLA_EXT_MASK */ +const ( + RTEXT_FILTER_VF = 1 << iota + RTEXT_FILTER_BRVLAN + RTEXT_FILTER_BRVLAN_COMPRESSED +) diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 6692b53ed9..380cc5967b 100644 --- a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -79,8 +79,8 @@ const ( CTA_TUPLE_ORIG = 1 CTA_TUPLE_REPLY = 2 CTA_STATUS = 3 - CTA_TIMEOUT = 8 - CTA_MARK = 9 + CTA_TIMEOUT = 7 + CTA_MARK = 8 CTA_PROTOINFO = 4 ) diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index f7b9575919..9ae65a12c2 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -231,7 +231,8 @@ const ( * on/off switch */ IFLA_VF_STATS /* network device statistics */ - IFLA_VF_MAX = IFLA_VF_STATS + IFLA_VF_TRUST /* Trust state of VF */ + IFLA_VF_MAX = IFLA_VF_TRUST ) const ( @@ -259,6 +260,7 @@ const ( SizeofVfSpoofchk = 0x08 SizeofVfLinkState = 0x08 SizeofVfRssQueryEn = 0x08 + SizeofVfTrust = 0x08 ) // struct ifla_vf_mac { @@ -419,12 +421,42 @@ func (msg *VfRssQueryEn) Serialize() []byte { return (*(*[SizeofVfRssQueryEn]byte)(unsafe.Pointer(msg)))[:] } +// struct ifla_vf_trust { +// __u32 vf; +// __u32 setting; +// }; + +type VfTrust struct { + Vf uint32 + Setting uint32 +} + +func (msg *VfTrust) Len() int { + return SizeofVfTrust +} + +func DeserializeVfTrust(b []byte) *VfTrust { + return (*VfTrust)(unsafe.Pointer(&b[0:SizeofVfTrust][0])) +} + +func (msg *VfTrust) Serialize() []byte { + return (*(*[SizeofVfTrust]byte)(unsafe.Pointer(msg)))[:] +} + +const ( + XDP_FLAGS_UPDATE_IF_NOEXIST = 1 << iota + XDP_FLAGS_SKB_MODE + XDP_FLAGS_DRV_MODE + XDP_FLAGS_MASK = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE +) + const ( IFLA_XDP_UNSPEC = iota IFLA_XDP_FD /* fd of xdp program to attach, or -1 to remove */ IFLA_XDP_ATTACHED /* read-only bool indicating if prog is attached */ IFLA_XDP_FLAGS /* xdp prog related flags */ - IFLA_XDP_MAX = IFLA_XDP_FLAGS + IFLA_XDP_PROG_ID /* xdp prog id */ + IFLA_XDP_MAX = IFLA_XDP_PROG_ID ) const ( diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index 1329acd864..72f7f6af3c 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -621,6 +621,20 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { return syscall.ParseNetlinkMessage(rb) } +// SetSendTimeout allows to set a send timeout on the socket +func (s *NetlinkSocket) SetSendTimeout(timeout *syscall.Timeval) error { + // Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine + // remains stuck on a send on a closed fd + return syscall.SetsockoptTimeval(int(s.fd), syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, timeout) +} + +// SetReceiveTimeout allows to set a receive timeout on the socket +func (s *NetlinkSocket) SetReceiveTimeout(timeout *syscall.Timeval) error { + // Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine + // remains stuck on a recvmsg on a closed fd + return syscall.SetsockoptTimeval(int(s.fd), syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, timeout) +} + func (s *NetlinkSocket) GetPid() (uint32, error) { fd := int(atomic.LoadInt32(&s.fd)) lsa, err := syscall.Getsockname(fd) diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go index 2c0deddb32..1123396e47 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -160,71 +160,73 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if prio, ok := qdisc.(*Prio); ok { + + switch qdisc := qdisc.(type) { + case *Prio: tcmap := nl.TcPrioMap{ - Bands: int32(prio.Bands), - Priomap: prio.PriorityMap, + Bands: int32(qdisc.Bands), + Priomap: qdisc.PriorityMap, } options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) - } else if tbf, ok := qdisc.(*Tbf); ok { + case *Tbf: opt := nl.TcTbfQopt{} - opt.Rate.Rate = uint32(tbf.Rate) - opt.Peakrate.Rate = uint32(tbf.Peakrate) - opt.Limit = tbf.Limit - opt.Buffer = tbf.Buffer + opt.Rate.Rate = uint32(qdisc.Rate) + opt.Peakrate.Rate = uint32(qdisc.Peakrate) + opt.Limit = qdisc.Limit + opt.Buffer = qdisc.Buffer nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) - if tbf.Rate >= uint64(1<<32) { - nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate)) + if qdisc.Rate >= uint64(1<<32) { + nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate)) } - if tbf.Peakrate >= uint64(1<<32) { - nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate)) + if qdisc.Peakrate >= uint64(1<<32) { + nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate)) } - if tbf.Peakrate > 0 { - nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst)) + if qdisc.Peakrate > 0 { + nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst)) } - } else if htb, ok := qdisc.(*Htb); ok { + case *Htb: opt := nl.TcHtbGlob{} - opt.Version = htb.Version - opt.Rate2Quantum = htb.Rate2Quantum - opt.Defcls = htb.Defcls + opt.Version = qdisc.Version + opt.Rate2Quantum = qdisc.Rate2Quantum + opt.Defcls = qdisc.Defcls // TODO: Handle Debug properly. For now default to 0 - opt.Debug = htb.Debug - opt.DirectPkts = htb.DirectPkts + opt.Debug = qdisc.Debug + opt.DirectPkts = qdisc.DirectPkts nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize()) // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) - } else if netem, ok := qdisc.(*Netem); ok { + case *Netem: opt := nl.TcNetemQopt{} - opt.Latency = netem.Latency - opt.Limit = netem.Limit - opt.Loss = netem.Loss - opt.Gap = netem.Gap - opt.Duplicate = netem.Duplicate - opt.Jitter = netem.Jitter + opt.Latency = qdisc.Latency + opt.Limit = qdisc.Limit + opt.Loss = qdisc.Loss + opt.Gap = qdisc.Gap + opt.Duplicate = qdisc.Duplicate + opt.Jitter = qdisc.Jitter options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) // Correlation corr := nl.TcNetemCorr{} - corr.DelayCorr = netem.DelayCorr - corr.LossCorr = netem.LossCorr - corr.DupCorr = netem.DuplicateCorr + corr.DelayCorr = qdisc.DelayCorr + corr.LossCorr = qdisc.LossCorr + corr.DupCorr = qdisc.DuplicateCorr if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 { nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize()) } // Corruption corruption := nl.TcNetemCorrupt{} - corruption.Probability = netem.CorruptProb - corruption.Correlation = netem.CorruptCorr + corruption.Probability = qdisc.CorruptProb + corruption.Correlation = qdisc.CorruptCorr if corruption.Probability > 0 { nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize()) } // Reorder reorder := nl.TcNetemReorder{} - reorder.Probability = netem.ReorderProb - reorder.Correlation = netem.ReorderCorr + reorder.Probability = qdisc.ReorderProb + reorder.Correlation = qdisc.ReorderCorr if reorder.Probability > 0 { nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize()) } - } else if _, ok := qdisc.(*Ingress); ok { + case *Ingress: // ingress filters must use the proper handle if qdisc.Attrs().Parent != HANDLE_INGRESS { return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go index 03ac4b2391..68c6a2230d 100644 --- a/vendor/github.com/vishvananda/netlink/route.go +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -16,6 +16,7 @@ type Destination interface { Decode([]byte) error Encode() ([]byte, error) String() string + Equal(Destination) bool } type Encap interface { @@ -23,6 +24,7 @@ type Encap interface { Decode([]byte) error Encode() ([]byte, error) String() string + Equal(Encap) bool } // Route represents a netlink route. @@ -72,6 +74,25 @@ func (r Route) String() string { return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } +func (r Route) Equal(x Route) bool { + return r.LinkIndex == x.LinkIndex && + r.ILinkIndex == x.ILinkIndex && + r.Scope == x.Scope && + ipNetEqual(r.Dst, x.Dst) && + r.Src.Equal(x.Src) && + r.Gw.Equal(x.Gw) && + nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && + r.Protocol == x.Protocol && + r.Priority == x.Priority && + r.Table == x.Table && + r.Type == x.Type && + r.Tos == x.Tos && + r.Flags == x.Flags && + (r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) && + (r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) && + (r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) +} + func (r *Route) SetFlag(flag NextHopFlag) { r.Flags |= int(flag) } @@ -110,7 +131,46 @@ func (n *NexthopInfo) String() string { elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap)) } elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1)) - elems = append(elems, fmt.Sprintf("Gw: %d", n.Gw)) + elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw)) elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags())) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } + +func (n NexthopInfo) Equal(x NexthopInfo) bool { + return n.LinkIndex == x.LinkIndex && + n.Hops == x.Hops && + n.Gw.Equal(x.Gw) && + n.Flags == x.Flags && + (n.NewDst == x.NewDst || (n.NewDst != nil && n.NewDst.Equal(x.NewDst))) && + (n.Encap == x.Encap || (n.Encap != nil && n.Encap.Equal(x.Encap))) +} + +type nexthopInfoSlice []*NexthopInfo + +func (n nexthopInfoSlice) Equal(x []*NexthopInfo) bool { + if len(n) != len(x) { + return false + } + for i := range n { + if n[i] == nil || x[i] == nil { + return false + } + if !n[i].Equal(*x[i]) { + return false + } + } + return true +} + +// ipNetEqual returns true iff both IPNet are equal +func ipNetEqual(ipn1 *net.IPNet, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go index cd739e7146..9234c6986d 100644 --- a/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -86,6 +86,34 @@ func (d *MPLSDestination) String() string { return strings.Join(s, "/") } +func (d *MPLSDestination) Equal(x Destination) bool { + o, ok := x.(*MPLSDestination) + if !ok { + return false + } + if d == nil && o == nil { + return true + } + if d == nil || o == nil { + return false + } + if d.Labels == nil && o.Labels == nil { + return true + } + if d.Labels == nil || o.Labels == nil { + return false + } + if len(d.Labels) != len(o.Labels) { + return false + } + for i := range d.Labels { + if d.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + type MPLSEncap struct { Labels []int } @@ -129,6 +157,34 @@ func (e *MPLSEncap) String() string { return strings.Join(s, "/") } +func (e *MPLSEncap) Equal(x Encap) bool { + o, ok := x.(*MPLSEncap) + if !ok { + return false + } + if e == nil && o == nil { + return true + } + if e == nil || o == nil { + return false + } + if e.Labels == nil && o.Labels == nil { + return true + } + if e.Labels == nil || o.Labels == nil { + return false + } + if len(e.Labels) != len(o.Labels) { + return false + } + for i := range e.Labels { + if e.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + // RouteAdd will add a route to the system. // Equivalent to: `ip route add $route` func RouteAdd(route *Route) error { @@ -421,19 +477,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) continue case filterMask&RT_FILTER_DST != 0: if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { - if filter.Dst == nil { - if route.Dst != nil { - continue - } - } else { - if route.Dst == nil { - continue - } - aMaskLen, aMaskBits := route.Dst.Mask.Size() - bMaskLen, bMaskBits := filter.Dst.Mask.Size() - if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) { - continue - } + if !ipNetEqual(route.Dst, filter.Dst) { + continue } } } @@ -633,16 +678,34 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { // RouteSubscribe takes a chan down which notifications will be sent // when routes are added or deleted. Close the 'done' chan to stop subscription. func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(netns.None(), netns.None(), ch, done) + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil) } // RouteSubscribeAt works like RouteSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(ns, netns.None(), ch, done) + return routeSubscribeAt(ns, netns.None(), ch, done, nil) } -func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { +// RouteSubscribeOptions contains a set of options to use with +// RouteSubscribeWithOptions. +type RouteSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) +} + +// RouteSubscribeWithOptions work like RouteSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, options RouteSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback) +} + +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error)) error { s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) if err != nil { return err @@ -658,11 +721,17 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < for { msgs, err := s.Receive() if err != nil { + if cberr != nil { + cberr(err) + } return } for _, m := range msgs { route, err := deserializeRoute(m.Data) if err != nil { + if cberr != nil { + cberr(err) + } return } ch <- RouteUpdate{Type: m.Header.Type, Route: route} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go index f0243defd7..e4d9168d6c 100644 --- a/vendor/github.com/vishvananda/netlink/rule.go +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -8,6 +8,7 @@ import ( // Rule represents a netlink rule. type Rule struct { Priority int + Family int Table int Mark int Mask int diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go index f9cdc855f1..cbd91a56bb 100644 --- a/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -37,6 +37,9 @@ func (h *Handle) RuleDel(rule *Rule) error { func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { msg := nl.NewRtMsg() msg.Family = syscall.AF_INET + if rule.Family != 0 { + msg.Family = uint8(rule.Family) + } var dstFamily uint8 var rtAttrs []*nl.RtAttr