From f8f29622a4b11b288da7a59df7778498b6f67597 Mon Sep 17 00:00:00 2001 From: Ruidong Cao Date: Fri, 19 Oct 2018 05:18:00 +0800 Subject: [PATCH 1/4] virtcontainers: refactor hotplug qmp functions Refactor these functions so differernt types of endpoints can use a unified function to hotplug nics. Fixes #731 Signed-off-by: Ruidong Cao --- virtcontainers/bridgedmacvlan_endpoint.go | 5 ++ .../bridgedmacvlan_endpoint_test.go | 12 ++-- virtcontainers/endpoint.go | 1 + virtcontainers/macvtap_endpoint.go | 5 ++ virtcontainers/network.go | 27 +++++--- virtcontainers/physical_endpoint.go | 5 ++ virtcontainers/qemu.go | 63 +++++++++++-------- virtcontainers/qemu_arch_base_test.go | 12 ++-- virtcontainers/sandbox.go | 3 + virtcontainers/veth_endpoint.go | 5 ++ virtcontainers/veth_endpoint_test.go | 24 ++++--- virtcontainers/vhostuser_endpoint.go | 5 ++ 12 files changed, 111 insertions(+), 56 deletions(-) diff --git a/virtcontainers/bridgedmacvlan_endpoint.go b/virtcontainers/bridgedmacvlan_endpoint.go index 705ff9d2cd..9ea0a30663 100644 --- a/virtcontainers/bridgedmacvlan_endpoint.go +++ b/virtcontainers/bridgedmacvlan_endpoint.go @@ -71,6 +71,11 @@ func (endpoint *BridgedMacvlanEndpoint) PciAddr() string { return endpoint.PCIAddr } +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *BridgedMacvlanEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + // NetworkPair returns the network pair of the endpoint. func (endpoint *BridgedMacvlanEndpoint) NetworkPair() *NetworkInterfacePair { return &endpoint.NetPair diff --git a/virtcontainers/bridgedmacvlan_endpoint_test.go b/virtcontainers/bridgedmacvlan_endpoint_test.go index 5bee07e750..0cc4a94012 100644 --- a/virtcontainers/bridgedmacvlan_endpoint_test.go +++ b/virtcontainers/bridgedmacvlan_endpoint_test.go @@ -16,15 +16,17 @@ func TestCreateBridgedMacvlanEndpoint(t *testing.T) { expected := &BridgedMacvlanEndpoint{ NetPair: NetworkInterfacePair{ - ID: "uniqueTestID-4", - Name: "br4_kata", + TapInterface: TapInterface{ + ID: "uniqueTestID-4", + Name: "br4_kata", + TAPIface: NetworkInterface{ + Name: "tap4_kata", + }, + }, VirtIface: NetworkInterface{ Name: "eth4", HardAddr: macAddr.String(), }, - TAPIface: NetworkInterface{ - Name: "tap4_kata", - }, NetInterworkingModel: DefaultNetInterworkingModel, }, EndpointType: BridgedMacvlanEndpointType, diff --git a/virtcontainers/endpoint.go b/virtcontainers/endpoint.go index af5b39cbfb..60a5913e51 100644 --- a/virtcontainers/endpoint.go +++ b/virtcontainers/endpoint.go @@ -19,6 +19,7 @@ type Endpoint interface { NetworkPair() *NetworkInterfacePair SetProperties(NetworkInfo) + SetPciAddr(string) Attach(hypervisor) error Detach(netNsCreated bool, netNsPath string) error HotAttach(h hypervisor) error diff --git a/virtcontainers/macvtap_endpoint.go b/virtcontainers/macvtap_endpoint.go index 0abba7d582..d35d9c70cb 100644 --- a/virtcontainers/macvtap_endpoint.go +++ b/virtcontainers/macvtap_endpoint.go @@ -93,6 +93,11 @@ func (endpoint *MacvtapEndpoint) PciAddr() string { return endpoint.PCIAddr } +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *MacvtapEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + // NetworkPair returns the network pair of the endpoint. func (endpoint *MacvtapEndpoint) NetworkPair() *NetworkInterfacePair { return nil diff --git a/virtcontainers/network.go b/virtcontainers/network.go index 7510dac419..48eaa254ae 100644 --- a/virtcontainers/network.go +++ b/virtcontainers/network.go @@ -122,15 +122,20 @@ type NetworkInterface struct { Addrs []netlink.Addr } +// TapInterface defines a tap nic. +type TapInterface struct { + ID string + Name string + TAPIface NetworkInterface + VMFds []*os.File + VhostFds []*os.File +} + // NetworkInterfacePair defines a pair between VM and virtual network interfaces. type NetworkInterfacePair struct { - ID string - Name string + TapInterface VirtIface NetworkInterface - TAPIface NetworkInterface NetInterworkingModel - VMFds []*os.File - VhostFds []*os.File } // NetworkConfig is the network configuration related to a network. @@ -962,15 +967,17 @@ func createNetworkInterfacePair(idx int, ifName string, interworkingModel NetInt } netPair := NetworkInterfacePair{ - ID: uniqueID, - Name: fmt.Sprintf("br%d_kata", idx), + TapInterface: TapInterface{ + ID: uniqueID, + Name: fmt.Sprintf("br%d_kata", idx), + TAPIface: NetworkInterface{ + Name: fmt.Sprintf("tap%d_kata", idx), + }, + }, VirtIface: NetworkInterface{ Name: fmt.Sprintf("eth%d", idx), HardAddr: randomMacAddr, }, - TAPIface: NetworkInterface{ - Name: fmt.Sprintf("tap%d_kata", idx), - }, NetInterworkingModel: interworkingModel, } diff --git a/virtcontainers/physical_endpoint.go b/virtcontainers/physical_endpoint.go index 57c5f510eb..dffc19cf84 100644 --- a/virtcontainers/physical_endpoint.go +++ b/virtcontainers/physical_endpoint.go @@ -53,6 +53,11 @@ func (endpoint *PhysicalEndpoint) PciAddr() string { return endpoint.PCIAddr } +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *PhysicalEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + // SetProperties sets the properties of the physical endpoint. func (endpoint *PhysicalEndpoint) SetProperties(properties NetworkInfo) { endpoint.EndpointProperties = properties diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index a2ea921304..e35f6b070f 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -838,66 +838,77 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) error { return nil } -func (q *qemu) hotplugMacvtap(drive *VethEndpoint) error { +func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error { var ( VMFdNames []string VhostFdNames []string ) - for i, VMFd := range drive.NetPair.VMFds { + for i, VMFd := range VMFds { fdName := fmt.Sprintf("fd%d", i) - err := q.qmpMonitorCh.qmp.ExecuteGetFD(q.qmpMonitorCh.ctx, fdName, VMFd) - if err != nil { + if err := q.qmpMonitorCh.qmp.ExecuteGetFD(q.qmpMonitorCh.ctx, fdName, VMFd); err != nil { return err } VMFdNames = append(VMFdNames, fdName) } - for i, VhostFd := range drive.NetPair.VhostFds { + for i, VhostFd := range VhostFds { fdName := fmt.Sprintf("vhostfd%d", i) - err := q.qmpMonitorCh.qmp.ExecuteGetFD(q.qmpMonitorCh.ctx, fdName, VhostFd) - if err != nil { + if err := q.qmpMonitorCh.qmp.ExecuteGetFD(q.qmpMonitorCh.ctx, fdName, VhostFd); err != nil { return err } VhostFdNames = append(VhostFdNames, fdName) } - return q.qmpMonitorCh.qmp.ExecuteNetdevAddByFds(q.qmpMonitorCh.ctx, "tap", drive.NetPair.Name, VMFdNames, VhostFdNames) + return q.qmpMonitorCh.qmp.ExecuteNetdevAddByFds(q.qmpMonitorCh.ctx, "tap", name, VMFdNames, VhostFdNames) } -func (q *qemu) hotplugNetDevice(drive *VethEndpoint, op operation) error { +func (q *qemu) hotplugNetDevice(endpoint Endpoint, op operation) error { err := q.qmpSetup() if err != nil { return err } - devID := "virtio-" + drive.NetPair.ID + var tap TapInterface if op == addDevice { - switch drive.NetPair.NetInterworkingModel { - case NetXConnectBridgedModel: - if err := q.qmpMonitorCh.qmp.ExecuteNetdevAdd(q.qmpMonitorCh.ctx, "tap", drive.NetPair.Name, drive.NetPair.TAPIface.Name, "no", "no", int(q.config.NumVCPUs)); err != nil { - return err - } - case NetXConnectMacVtapModel: - if err := q.hotplugMacvtap(drive); err != nil { - return err - } + switch endpoint.Type() { + case VethEndpointType: + drive := endpoint.(*VethEndpoint) + tap = drive.NetPair.TapInterface default: - return fmt.Errorf("this net interworking model is not supported") + return fmt.Errorf("this endpoint is not supported") } - addr, bridge, err := q.addDeviceToBridge(drive.NetPair.ID) + + if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil { + return err + } + + addr, bridge, err := q.addDeviceToBridge(tap.ID) if err != nil { return err } - drive.PCIAddr = fmt.Sprintf("%02x/%s", bridge.Addr, addr) - if err = q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, drive.NetPair.Name, devID, drive.NetPair.TAPIface.HardAddr, addr, bridge.ID, romFile, int(q.config.NumVCPUs)); err != nil { + pciAddr := fmt.Sprintf("%02x/%s", bridge.Addr, addr) + endpoint.SetPciAddr(pciAddr) + + devID := "virtio-" + tap.ID + if err = q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs)); err != nil { return err } } else { - if err := q.removeDeviceFromBridge(drive.NetPair.ID); err != nil { + switch endpoint.Type() { + case VethEndpointType: + drive := endpoint.(*VethEndpoint) + tap = drive.NetPair.TapInterface + default: + return fmt.Errorf("this endpoint is not supported") + } + + if err := q.removeDeviceFromBridge(tap.ID); err != nil { return err } + + devID := "virtio-" + tap.ID if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, devID); err != nil { return err } - if err := q.qmpMonitorCh.qmp.ExecuteNetdevDel(q.qmpMonitorCh.ctx, drive.NetPair.Name); err != nil { + if err := q.qmpMonitorCh.qmp.ExecuteNetdevDel(q.qmpMonitorCh.ctx, tap.Name); err != nil { return err } } @@ -919,7 +930,7 @@ func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operati memdev := devInfo.(*memoryDevice) return q.hotplugMemory(memdev, op) case netDev: - device := devInfo.(*VethEndpoint) + device := devInfo.(Endpoint) return nil, q.hotplugNetDevice(device, op) default: return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) diff --git a/virtcontainers/qemu_arch_base_test.go b/virtcontainers/qemu_arch_base_test.go index 02784c8bac..dcca194ea5 100644 --- a/virtcontainers/qemu_arch_base_test.go +++ b/virtcontainers/qemu_arch_base_test.go @@ -443,15 +443,17 @@ func TestQemuArchBaseAppendNetwork(t *testing.T) { macvlanEp := &BridgedMacvlanEndpoint{ NetPair: NetworkInterfacePair{ - ID: "uniqueTestID-4", - Name: "br4_kata", + TapInterface: TapInterface{ + ID: "uniqueTestID-4", + Name: "br4_kata", + TAPIface: NetworkInterface{ + Name: "tap4_kata", + }, + }, VirtIface: NetworkInterface{ Name: "eth4", HardAddr: macAddr.String(), }, - TAPIface: NetworkInterface{ - Name: "tap4_kata", - }, NetInterworkingModel: DefaultNetInterworkingModel, }, EndpointType: BridgedMacvlanEndpointType, diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index c207bd1718..da85cebc71 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1142,6 +1142,9 @@ func (s *Sandbox) RemoveInterface(inf *grpc.Interface) (*grpc.Interface, error) return inf, err } s.networkNS.Endpoints = append(s.networkNS.Endpoints[:i], s.networkNS.Endpoints[i+1:]...) + if err := s.storage.storeSandboxNetwork(s.id, s.networkNS); err != nil { + return inf, err + } break } } diff --git a/virtcontainers/veth_endpoint.go b/virtcontainers/veth_endpoint.go index aeca9ac16f..3246151542 100644 --- a/virtcontainers/veth_endpoint.go +++ b/virtcontainers/veth_endpoint.go @@ -70,6 +70,11 @@ func (endpoint *VethEndpoint) PciAddr() string { return endpoint.PCIAddr } +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *VethEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + // NetworkPair returns the network pair of the endpoint. func (endpoint *VethEndpoint) NetworkPair() *NetworkInterfacePair { return &endpoint.NetPair diff --git a/virtcontainers/veth_endpoint_test.go b/virtcontainers/veth_endpoint_test.go index 211e409aed..ae62ddd525 100644 --- a/virtcontainers/veth_endpoint_test.go +++ b/virtcontainers/veth_endpoint_test.go @@ -16,15 +16,17 @@ func TestCreateVethNetworkEndpoint(t *testing.T) { expected := &VethEndpoint{ NetPair: NetworkInterfacePair{ - ID: "uniqueTestID-4", - Name: "br4_kata", + TapInterface: TapInterface{ + ID: "uniqueTestID-4", + Name: "br4_kata", + TAPIface: NetworkInterface{ + Name: "tap4_kata", + }, + }, VirtIface: NetworkInterface{ Name: "eth4", HardAddr: macAddr.String(), }, - TAPIface: NetworkInterface{ - Name: "tap4_kata", - }, NetInterworkingModel: DefaultNetInterworkingModel, }, EndpointType: VethEndpointType, @@ -51,15 +53,17 @@ func TestCreateVethNetworkEndpointChooseIfaceName(t *testing.T) { expected := &VethEndpoint{ NetPair: NetworkInterfacePair{ - ID: "uniqueTestID-4", - Name: "br4_kata", + TapInterface: TapInterface{ + ID: "uniqueTestID-4", + Name: "br4_kata", + TAPIface: NetworkInterface{ + Name: "tap4_kata", + }, + }, VirtIface: NetworkInterface{ Name: "eth1", HardAddr: macAddr.String(), }, - TAPIface: NetworkInterface{ - Name: "tap4_kata", - }, NetInterworkingModel: DefaultNetInterworkingModel, }, EndpointType: VethEndpointType, diff --git a/virtcontainers/vhostuser_endpoint.go b/virtcontainers/vhostuser_endpoint.go index 3207f3ae49..4960dba1be 100644 --- a/virtcontainers/vhostuser_endpoint.go +++ b/virtcontainers/vhostuser_endpoint.go @@ -62,6 +62,11 @@ func (endpoint *VhostUserEndpoint) PciAddr() string { return endpoint.PCIAddr } +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *VhostUserEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + // NetworkPair returns the network pair of the endpoint. func (endpoint *VhostUserEndpoint) NetworkPair() *NetworkInterfacePair { return nil From 6935279bebdc08e73b9d992393d9480376219513 Mon Sep 17 00:00:00 2001 From: Ruidong Cao Date: Wed, 17 Oct 2018 23:02:52 +0800 Subject: [PATCH 2/4] network: add new NetInterworkingModel "none" and endpoint type TapEndpoint This model is for not creating a new net ns for VM and directly creating taps in the host net ns. Signed-off-by: Ruidong Cao --- cli/config/configuration.toml.in | 4 + virtcontainers/endpoint.go | 8 ++ virtcontainers/network.go | 90 ++++++++++----- virtcontainers/qemu.go | 6 + virtcontainers/sandbox.go | 9 +- virtcontainers/tap_endpoint.go | 189 +++++++++++++++++++++++++++++++ 6 files changed, 278 insertions(+), 28 deletions(-) create mode 100644 virtcontainers/tap_endpoint.go diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index 5ee4e474e5..0302ec1f89 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -230,6 +230,10 @@ path = "@NETMONPATH@" # - macvtap # Used when the Container network interface can be bridged using # macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# internetworking_model="@DEFNETWORKMODEL@" # If enabled, the runtime will create opentracing.io traces and spans. diff --git a/virtcontainers/endpoint.go b/virtcontainers/endpoint.go index 60a5913e51..ea08f00deb 100644 --- a/virtcontainers/endpoint.go +++ b/virtcontainers/endpoint.go @@ -44,6 +44,9 @@ const ( // MacvtapEndpointType is macvtap network interface. MacvtapEndpointType EndpointType = "macvtap" + + // TapEndpointType is tap network interface. + TapEndpointType EndpointType = "tap" ) // Set sets an endpoint type based on the input string. @@ -64,6 +67,9 @@ func (endpointType *EndpointType) Set(value string) error { case "macvtap": *endpointType = MacvtapEndpointType return nil + case "tap": + *endpointType = TapEndpointType + return nil default: return fmt.Errorf("Unknown endpoint type %s", value) } @@ -82,6 +88,8 @@ func (endpointType *EndpointType) String() string { return string(BridgedMacvlanEndpointType) case MacvtapEndpointType: return string(MacvtapEndpointType) + case TapEndpointType: + return string(TapEndpointType) default: return "" } diff --git a/virtcontainers/network.go b/virtcontainers/network.go index 48eaa254ae..b7368883dd 100644 --- a/virtcontainers/network.go +++ b/virtcontainers/network.go @@ -51,6 +51,9 @@ const ( // This will be used for vethtap, macvtap, ipvtap NetXConnectEnlightenedModel + // NetXConnectNoneModel can be used when the VM is in the host network namespace + NetXConnectNoneModel + // NetXConnectInvalidModel is the last item to check valid values by IsValid() NetXConnectInvalidModel ) @@ -75,6 +78,9 @@ func (n *NetInterworkingModel) SetModel(modelName string) error { case "enlightened": *n = NetXConnectEnlightenedModel return nil + case "none": + *n = NetXConnectNoneModel + return nil } return fmt.Errorf("Unknown type %s", modelName) } @@ -122,7 +128,7 @@ type NetworkInterface struct { Addrs []netlink.Addr } -// TapInterface defines a tap nic. +// TapInterface defines a tap interface type TapInterface struct { ID string Name string @@ -199,27 +205,7 @@ func (n NetworkNamespace) MarshalJSON() ([]byte, error) { return b, err } -// UnmarshalJSON is the custom NetworkNamespace unmarshalling routine. -// This is needed for unmarshalling the Endpoints interfaces array. -func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { - var s struct { - NetNsPath string - NetNsCreated bool - Endpoints json.RawMessage - } - - if err := json.Unmarshal(b, &s); err != nil { - return err - } - - (*n).NetNsPath = s.NetNsPath - (*n).NetNsCreated = s.NetNsCreated - - var typedEndpoints []TypedJSONEndpoint - if err := json.Unmarshal([]byte(string(s.Endpoints)), &typedEndpoints); err != nil { - return err - } - +func generateEndpoints(typedEndpoints []TypedJSONEndpoint) ([]Endpoint, error) { var endpoints []Endpoint for _, e := range typedEndpoints { @@ -228,7 +214,7 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { var endpoint PhysicalEndpoint err := json.Unmarshal(e.Data, &endpoint) if err != nil { - return err + return nil, err } endpoints = append(endpoints, &endpoint) @@ -241,7 +227,7 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { var endpoint VethEndpoint err := json.Unmarshal(e.Data, &endpoint) if err != nil { - return err + return nil, err } endpoints = append(endpoints, &endpoint) @@ -254,7 +240,7 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { var endpoint VhostUserEndpoint err := json.Unmarshal(e.Data, &endpoint) if err != nil { - return err + return nil, err } endpoints = append(endpoints, &endpoint) @@ -267,7 +253,7 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { var endpoint BridgedMacvlanEndpoint err := json.Unmarshal(e.Data, &endpoint) if err != nil { - return err + return nil, err } networkLogger().WithFields(logrus.Fields{ @@ -279,7 +265,7 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { var endpoint MacvtapEndpoint err := json.Unmarshal(e.Data, &endpoint) if err != nil { - return err + return nil, err } networkLogger().WithFields(logrus.Fields{ @@ -287,10 +273,50 @@ func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { "endpoint-type": "macvtap", }).Info("endpoint unmarshalled") + case TapEndpointType: + var endpoint TapEndpoint + err := json.Unmarshal(e.Data, &endpoint) + if err != nil { + return nil, err + } + + endpoints = append(endpoints, &endpoint) + networkLogger().WithFields(logrus.Fields{ + "endpoint": endpoint, + "endpoint-type": "tap", + }).Info("endpoint unmarshalled") + default: networkLogger().WithField("endpoint-type", e.Type).Error("Ignoring unknown endpoint type") } } + return endpoints, nil +} + +// UnmarshalJSON is the custom NetworkNamespace unmarshalling routine. +// This is needed for unmarshalling the Endpoints interfaces array. +func (n *NetworkNamespace) UnmarshalJSON(b []byte) error { + var s struct { + NetNsPath string + NetNsCreated bool + Endpoints json.RawMessage + } + + if err := json.Unmarshal(b, &s); err != nil { + return err + } + + (*n).NetNsPath = s.NetNsPath + (*n).NetNsCreated = s.NetNsCreated + + var typedEndpoints []TypedJSONEndpoint + if err := json.Unmarshal([]byte(string(s.Endpoints)), &typedEndpoints); err != nil { + return err + } + endpoints, err := generateEndpoints(typedEndpoints) + if err != nil { + return err + } (*n).Endpoints = endpoints return nil @@ -828,6 +854,13 @@ func createNetNS() (string, error) { // into runtime.LockOSThread(), meaning it won't be executed in a // different thread than the one expected by the caller. func doNetNS(netNSPath string, cb func(ns.NetNS) error) error { + // if netNSPath is empty, the callback function will be run in the current network namespace. + // So skip the whole function, just call cb(). cb() needs a NetNS as arg but ignored, give it a fake one. + if netNSPath == "" { + var netNs ns.NetNS + return cb(netNs) + } + runtime.LockOSThread() defer runtime.UnlockOSThread() @@ -1123,6 +1156,9 @@ func createEndpoint(netInfo NetworkInfo, idx int, model NetInterworkingModel) (E } else if netInfo.Iface.Type == "macvtap" { networkLogger().Infof("macvtap interface found") endpoint, err = createMacvtapNetworkEndpoint(netInfo) + } else if netInfo.Iface.Type == "tap" { + networkLogger().Info("tap interface found") + endpoint, err = createTapNetworkEndpoint(idx, netInfo.Iface.Name) } else if netInfo.Iface.Type == "veth" { endpoint, err = createVethNetworkEndpoint(idx, netInfo.Iface.Name, model) } else { diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index e35f6b070f..3c93a4f648 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -872,6 +872,9 @@ func (q *qemu) hotplugNetDevice(endpoint Endpoint, op operation) error { case VethEndpointType: drive := endpoint.(*VethEndpoint) tap = drive.NetPair.TapInterface + case TapEndpointType: + drive := endpoint.(*TapEndpoint) + tap = drive.TapInterface default: return fmt.Errorf("this endpoint is not supported") } @@ -896,6 +899,9 @@ func (q *qemu) hotplugNetDevice(endpoint Endpoint, op operation) error { case VethEndpointType: drive := endpoint.(*VethEndpoint) tap = drive.NetPair.TapInterface + case TapEndpointType: + drive := endpoint.(*TapEndpoint) + tap = drive.TapInterface default: return fmt.Errorf("this endpoint is not supported") } diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index da85cebc71..ac41678b51 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1089,6 +1089,13 @@ func (s *Sandbox) generateNetInfo(inf *grpc.Interface) (NetworkInfo, error) { addrs = append(addrs, *netlinkAddr) } + var ifaceType string + if s.config.NetworkConfig.InterworkingModel == NetXConnectNoneModel { + ifaceType = "tap" + } else { + ifaceType = "veth" + } + return NetworkInfo{ Iface: NetlinkIface{ LinkAttrs: netlink.LinkAttrs{ @@ -1096,7 +1103,7 @@ func (s *Sandbox) generateNetInfo(inf *grpc.Interface) (NetworkInfo, error) { HardwareAddr: hw, MTU: int(inf.Mtu), }, - Type: "", + Type: ifaceType, }, Addrs: addrs, }, nil diff --git a/virtcontainers/tap_endpoint.go b/virtcontainers/tap_endpoint.go new file mode 100644 index 0000000000..c3cd3c34cc --- /dev/null +++ b/virtcontainers/tap_endpoint.go @@ -0,0 +1,189 @@ +// Copyright (c) 2018 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/vishvananda/netlink" + + "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" +) + +// TapEndpoint represents just a tap endpoint +type TapEndpoint struct { + TapInterface TapInterface + EndpointProperties NetworkInfo + EndpointType EndpointType + PCIAddr string +} + +// Properties returns the properties of the tap interface. +func (endpoint *TapEndpoint) Properties() NetworkInfo { + return endpoint.EndpointProperties +} + +// Name returns name of the tap interface in the network pair. +func (endpoint *TapEndpoint) Name() string { + return endpoint.TapInterface.Name +} + +// HardwareAddr returns the mac address that is assigned to the tap interface +func (endpoint *TapEndpoint) HardwareAddr() string { + return endpoint.TapInterface.TAPIface.HardAddr +} + +// Type identifies the endpoint as a tap endpoint. +func (endpoint *TapEndpoint) Type() EndpointType { + return endpoint.EndpointType +} + +// PciAddr returns the PCI address of the endpoint. +func (endpoint *TapEndpoint) PciAddr() string { + return endpoint.PCIAddr +} + +// SetPciAddr sets the PCI address of the endpoint. +func (endpoint *TapEndpoint) SetPciAddr(pciAddr string) { + endpoint.PCIAddr = pciAddr +} + +// NetworkPair returns the network pair of the endpoint. +func (endpoint *TapEndpoint) NetworkPair() *NetworkInterfacePair { + return nil +} + +// SetProperties sets the properties for the endpoint. +func (endpoint *TapEndpoint) SetProperties(properties NetworkInfo) { + endpoint.EndpointProperties = properties +} + +// Attach for tap endpoint adds the tap interface to the hypervisor. +func (endpoint *TapEndpoint) Attach(h hypervisor) error { + return fmt.Errorf("TapEndpoint does not support Attach, if you're using docker please use --net none") +} + +// Detach for the tap endpoint tears down the tap +func (endpoint *TapEndpoint) Detach(netNsCreated bool, netNsPath string) error { + if !netNsCreated && netNsPath != "" { + return nil + } + + networkLogger().WithField("endpoint-type", TapEndpointType).Info("Detaching endpoint") + return doNetNS(netNsPath, func(_ ns.NetNS) error { + return unTapNetwork(endpoint.TapInterface.TAPIface.Name) + }) +} + +// HotAttach for the tap endpoint uses hot plug device +func (endpoint *TapEndpoint) HotAttach(h hypervisor) error { + networkLogger().Info("Hot attaching tap endpoint") + if err := tapNetwork(endpoint, h.hypervisorConfig().NumVCPUs, h.hypervisorConfig().DisableVhostNet); err != nil { + networkLogger().WithError(err).Error("Error bridging tap ep") + return err + } + + if _, err := h.hotplugAddDevice(endpoint, netDev); err != nil { + networkLogger().WithError(err).Error("Error attach tap ep") + return err + } + return nil +} + +// HotDetach for the tap endpoint uses hot pull device +func (endpoint *TapEndpoint) HotDetach(h hypervisor, netNsCreated bool, netNsPath string) error { + networkLogger().Info("Hot detaching tap endpoint") + if err := doNetNS(netNsPath, func(_ ns.NetNS) error { + return unTapNetwork(endpoint.TapInterface.TAPIface.Name) + }); err != nil { + networkLogger().WithError(err).Warn("Error un-bridging tap ep") + } + + if _, err := h.hotplugRemoveDevice(endpoint, netDev); err != nil { + networkLogger().WithError(err).Error("Error detach tap ep") + return err + } + return nil +} + +func createTapNetworkEndpoint(idx int, ifName string) (*TapEndpoint, error) { + if idx < 0 { + return &TapEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx) + } + uniqueID := uuid.Generate().String() + + endpoint := &TapEndpoint{ + TapInterface: TapInterface{ + ID: uniqueID, + Name: fmt.Sprintf("eth%d", idx), + TAPIface: NetworkInterface{ + Name: fmt.Sprintf("tap%d_kata", idx), + }, + }, + EndpointType: TapEndpointType, + } + if ifName != "" { + endpoint.TapInterface.Name = ifName + } + + return endpoint, nil +} + +func tapNetwork(endpoint *TapEndpoint, numCPUs uint32, disableVhostNet bool) error { + netHandle, err := netlink.NewHandle() + if err != nil { + return err + } + defer netHandle.Delete() + + tapLink, fds, err := createLink(netHandle, endpoint.TapInterface.TAPIface.Name, &netlink.Tuntap{}, int(numCPUs)) + if err != nil { + return fmt.Errorf("Could not create TAP interface: %s", err) + } + endpoint.TapInterface.VMFds = fds + if !disableVhostNet { + vhostFds, err := createVhostFds(int(numCPUs)) + if err != nil { + return fmt.Errorf("Could not setup vhost fds %s : %s", endpoint.TapInterface.Name, err) + } + endpoint.TapInterface.VhostFds = vhostFds + } + linkAttrs := endpoint.Properties().Iface.LinkAttrs + + // Save the MAC address to the TAP so that it can later be used + // to build the QMP command line. This MAC address has to be + // the one inside the VM in order to avoid any firewall issues. The + // bridge created by the network plugin on the host actually expects + // to see traffic from this MAC address and not another one. + endpoint.TapInterface.TAPIface.HardAddr = linkAttrs.HardwareAddr.String() + if err := netHandle.LinkSetMTU(tapLink, linkAttrs.MTU); err != nil { + return fmt.Errorf("Could not set TAP MTU %d: %s", linkAttrs.MTU, err) + } + if err := netHandle.LinkSetUp(tapLink); err != nil { + return fmt.Errorf("Could not enable TAP %s: %s", endpoint.TapInterface.Name, err) + } + return nil +} + +func unTapNetwork(name string) error { + netHandle, err := netlink.NewHandle() + if err != nil { + return err + } + defer netHandle.Delete() + tapLink, err := getLinkByName(netHandle, name, &netlink.Tuntap{}) + if err != nil { + return fmt.Errorf("Could not get TAP interface: %s", err) + } + if err := netHandle.LinkSetDown(tapLink); err != nil { + return fmt.Errorf("Could not disable TAP %s: %s", name, err) + } + if err := netHandle.LinkDel(tapLink); err != nil { + return fmt.Errorf("Could not remove TAP %s: %s", name, err) + } + return nil +} From 14e5437cae281fec95a79d0419820ca6b73167b4 Mon Sep 17 00:00:00 2001 From: Ruidong Cao Date: Fri, 21 Sep 2018 05:03:57 +0800 Subject: [PATCH 3/4] cli: add configuration option to use or not use host netns If `disable_new_netns` set to true, create VM and shim processes in the host netns Signed-off-by: Ruidong Cao --- cli/config.go | 24 +++++++++++++++++++++--- cli/config/configuration.toml.in | 10 ++++++++++ cli/kata-env.go | 20 +++++++++++--------- cli/network.go | 5 +++++ virtcontainers/default_network.go | 5 ----- virtcontainers/network.go | 1 + virtcontainers/pkg/oci/utils.go | 4 ++++ virtcontainers/sandbox.go | 4 ++++ 8 files changed, 56 insertions(+), 17 deletions(-) diff --git a/cli/config.go b/cli/config.go index bc3215909f..4eb9c64935 100644 --- a/cli/config.go +++ b/cli/config.go @@ -108,6 +108,7 @@ type proxy struct { type runtime struct { Debug bool `toml:"enable_debug"` Tracing bool `toml:"enable_tracing"` + DisableNewNetNs bool `toml:"disable_new_netns"` InterNetworkModel string `toml:"internetworking_model"` } @@ -598,9 +599,7 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat kataLog.Logger.Level = originalLoggerLevel } - if tomlConf.Runtime.Tracing { - tracing = true - } + tracing = tomlConf.Runtime.Tracing if tomlConf.Runtime.InterNetworkModel != "" { err = config.InterNetworkModel.SetModel(tomlConf.Runtime.InterNetworkModel) @@ -626,6 +625,11 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat return "", config, err } + config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs + if err := checkNetNsConfig(config); err != nil { + return "", config, err + } + // use no proxy if HypervisorConfig.UseVSock is true if config.HypervisorConfig.UseVSock { kataLog.Info("VSOCK supported, configure to not use proxy") @@ -640,6 +644,20 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat return resolved, config, nil } +// checkNetNsConfig performs sanity checks on disable_new_netns config. +// Because it is an expert option and conflicts with some other common configs. +func checkNetNsConfig(config oci.RuntimeConfig) error { + if config.DisableNewNetNs { + if config.NetmonConfig.Enable { + return fmt.Errorf("config disable_new_netns conflicts with enable_netmon") + } + if config.InterNetworkModel != vc.NetXConnectNoneModel { + return fmt.Errorf("config disable_new_netns only works with 'none' internetworking_model") + } + } + return nil +} + // checkHypervisorConfig performs basic "sanity checks" on the hypervisor // config. func checkHypervisorConfig(config vc.HypervisorConfig) error { diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index 0302ec1f89..8bea778af0 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -240,3 +240,13 @@ internetworking_model="@DEFNETWORKMODEL@" # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) #enable_tracing = true + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `enable_netmon` +# `disable_new_netns` conflicts with `internetworking_model=bridged` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# If you are using docker, `disable_new_netns` only works with `docker run --net=none` +# (default: false) +#disable_new_netns = true diff --git a/cli/kata-env.go b/cli/kata-env.go index 74d53db307..2f2c38586e 100644 --- a/cli/kata-env.go +++ b/cli/kata-env.go @@ -25,7 +25,7 @@ import ( // // XXX: Increment for every change to the output format // (meaning any change to the EnvInfo type). -const formatVersion = "1.0.18" +const formatVersion = "1.0.19" // MetaInfo stores information on the format of the output itself type MetaInfo struct { @@ -62,10 +62,11 @@ type RuntimeConfigInfo struct { // RuntimeInfo stores runtime details. type RuntimeInfo struct { - Version RuntimeVersionInfo - Config RuntimeConfigInfo - Debug bool - Path string + Version RuntimeVersionInfo + Config RuntimeConfigInfo + Debug bool + DisableNewNetNs bool + Path string } // RuntimeVersionInfo stores details of the runtime version @@ -171,10 +172,11 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo { runtimePath, _ := os.Executable() return RuntimeInfo{ - Debug: config.Debug, - Version: runtimeVersion, - Config: runtimeConfig, - Path: runtimePath, + Debug: config.Debug, + Version: runtimeVersion, + Config: runtimeConfig, + Path: runtimePath, + DisableNewNetNs: config.DisableNewNetNs, } } diff --git a/cli/network.go b/cli/network.go index 51597a1174..2c16a581e1 100644 --- a/cli/network.go +++ b/cli/network.go @@ -336,6 +336,11 @@ func hostNetworkingRequested(configNetNs string) (bool, error) { } func setupNetworkNamespace(config *vc.NetworkConfig) error { + if config.DisableNewNetNs { + kataLog.Info("DisableNewNetNs is on, shim and hypervisor are running in the host netns") + return nil + } + if config.NetNSPath == "" { n, err := ns.NewNS() if err != nil { diff --git a/virtcontainers/default_network.go b/virtcontainers/default_network.go index 31494338d3..7e7b8b8af2 100644 --- a/virtcontainers/default_network.go +++ b/virtcontainers/default_network.go @@ -7,7 +7,6 @@ package virtcontainers import ( "context" - "fmt" "github.com/containernetworking/plugins/pkg/ns" opentracing "github.com/opentracing/opentracing-go" @@ -35,10 +34,6 @@ func (n *defNetwork) run(networkNSPath string, cb func() error) error { span, _ := n.trace(context.Background(), "run") defer span.Finish() - if networkNSPath == "" { - return fmt.Errorf("networkNSPath cannot be empty") - } - return doNetNS(networkNSPath, func(_ ns.NetNS) error { return cb() }) diff --git a/virtcontainers/network.go b/virtcontainers/network.go index b7368883dd..e60d8d7fb8 100644 --- a/virtcontainers/network.go +++ b/virtcontainers/network.go @@ -148,6 +148,7 @@ type NetworkInterfacePair struct { type NetworkConfig struct { NetNSPath string NetNsCreated bool + DisableNewNetNs bool NetmonConfig NetmonConfig InterworkingModel NetInterworkingModel } diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index 7978be9cd3..706fffce56 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -121,6 +121,9 @@ type RuntimeConfig struct { InterNetworkModel vc.NetInterworkingModel FactoryConfig FactoryConfig Debug bool + + //Determines if create a netns for hypervisor process + DisableNewNetNs bool } // AddKernelParam allows the addition of new kernel parameters to an existing @@ -326,6 +329,7 @@ func networkConfig(ocispec CompatOCISpec, config RuntimeConfig) (vc.NetworkConfi } } netConf.InterworkingModel = config.InterNetworkModel + netConf.DisableNewNetNs = config.DisableNewNetNs netConf.NetmonConfig = vc.NetmonConfig{ Path: config.NetmonConfig.Path, diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index ac41678b51..b8334c1086 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1021,6 +1021,10 @@ func (s *Sandbox) startNetworkMonitor() error { } func (s *Sandbox) createNetwork() error { + if s.config.NetworkConfig.DisableNewNetNs { + return nil + } + span, _ := s.trace("createNetwork") defer span.Finish() From 7a5a57d50f9ff1751c8f24fdec0b9b1eccb44195 Mon Sep 17 00:00:00 2001 From: Ruidong Cao Date: Fri, 21 Sep 2018 05:04:21 +0800 Subject: [PATCH 4/4] cli: test: add unit test for kata-env and kata-check Add unit test for `disable_new_netns` Signed-off-by: Ruidong Cao --- cli/config_test.go | 31 +++++++++++++++++++++++++++---- cli/kata-env_test.go | 7 +++++-- cli/network_test.go | 5 +++++ virtcontainers/network_test.go | 1 + 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/cli/config_test.go b/cli/config_test.go index d3e7c9fbc4..2eb1c161c4 100644 --- a/cli/config_test.go +++ b/cli/config_test.go @@ -42,7 +42,7 @@ type testRuntimeConfig struct { LogPath string } -func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath, kernelParams, machineType, shimPath, proxyPath, netmonPath, logPath string, disableBlock bool, blockDeviceDriver string, enableIOThreads bool, hotplugVFIOOnRootBus bool) string { +func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath, kernelParams, machineType, shimPath, proxyPath, netmonPath, logPath string, disableBlock bool, blockDeviceDriver string, enableIOThreads bool, hotplugVFIOOnRootBus, disableNewNetNs bool) string { return ` # Runtime configuration file @@ -77,7 +77,8 @@ func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath enable_debug = ` + strconv.FormatBool(netmonDebug) + ` [runtime] - enable_debug = ` + strconv.FormatBool(runtimeDebug) + enable_debug = ` + strconv.FormatBool(runtimeDebug) + ` + disable_new_netns= ` + strconv.FormatBool(disableNewNetNs) } func createConfig(configPath string, fileData string) error { @@ -116,8 +117,9 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf blockDeviceDriver := "virtio-scsi" enableIOThreads := true hotplugVFIOOnRootBus := true + disableNewNetNs := false - runtimeConfigFileData := makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath, kernelParams, machineType, shimPath, proxyPath, netmonPath, logPath, disableBlockDevice, blockDeviceDriver, enableIOThreads, hotplugVFIOOnRootBus) + runtimeConfigFileData := makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath, kernelParams, machineType, shimPath, proxyPath, netmonPath, logPath, disableBlockDevice, blockDeviceDriver, enableIOThreads, hotplugVFIOOnRootBus, disableNewNetNs) configPath := path.Join(dir, "runtime.toml") err = createConfig(configPath, runtimeConfigFileData) @@ -192,7 +194,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf ShimType: defaultShim, ShimConfig: shimConfig, - NetmonConfig: netmonConfig, + NetmonConfig: netmonConfig, + DisableNewNetNs: disableNewNetNs, } config = testRuntimeConfig{ @@ -1455,3 +1458,23 @@ func TestCheckHypervisorConfig(t *testing.T) { kataLog.Logger.Out = savedOut } } + +func TestCheckNetNsConfig(t *testing.T) { + assert := assert.New(t) + + config := oci.RuntimeConfig{ + DisableNewNetNs: true, + NetmonConfig: vc.NetmonConfig{ + Enable: true, + }, + } + err := checkNetNsConfig(config) + assert.Error(err) + + config = oci.RuntimeConfig{ + DisableNewNetNs: true, + InterNetworkModel: vc.NetXConnectDefaultModel, + } + err = checkNetNsConfig(config) + assert.Error(err) +} diff --git a/cli/kata-env_test.go b/cli/kata-env_test.go index c465cc96c8..45a2a08f94 100644 --- a/cli/kata-env_test.go +++ b/cli/kata-env_test.go @@ -67,6 +67,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC blockStorageDriver := "virtio-scsi" enableIOThreads := true hotplugVFIOOnRootBus := true + disableNewNetNs := false // override defaultProxyPath = proxyPath @@ -121,6 +122,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC blockStorageDriver, enableIOThreads, hotplugVFIOOnRootBus, + disableNewNetNs, ) configFile = path.Join(prefixDir, "runtime.toml") @@ -293,8 +295,9 @@ func getExpectedRuntimeDetails(config oci.RuntimeConfig, configFile string) Runt Config: RuntimeConfigInfo{ Path: configFile, }, - Path: runtimePath, - Debug: config.Debug, + Path: runtimePath, + Debug: config.Debug, + DisableNewNetNs: config.DisableNewNetNs, } } diff --git a/cli/network_test.go b/cli/network_test.go index 3bf90f0f6a..b8c8337e2a 100644 --- a/cli/network_test.go +++ b/cli/network_test.go @@ -217,4 +217,9 @@ func TestSetupNetworkNamespace(t *testing.T) { n.Close() unix.Unmount(config.NetNSPath, unix.MNT_DETACH) os.RemoveAll(config.NetNSPath) + + // Config with DisableNewNetNs + config = &vc.NetworkConfig{DisableNewNetNs: true} + err = setupNetworkNamespace(config) + assert.NoError(err) } diff --git a/virtcontainers/network_test.go b/virtcontainers/network_test.go index 209bfbc30d..6f640b7861 100644 --- a/virtcontainers/network_test.go +++ b/virtcontainers/network_test.go @@ -216,6 +216,7 @@ func TestNetInterworkingModelSetModel(t *testing.T) { {"bridged Model", "bridged", false}, {"macvtap Model", "macvtap", false}, {"enlightened Model", "enlightened", false}, + {"none Model", "none", false}, } for _, tt := range tests {