Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 110 additions & 87 deletions internal/hcsoci/hcsdoc_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (

"github.com/Microsoft/hcsshim/internal/layers"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/oci"
"github.com/Microsoft/hcsshim/internal/processorinfo"
"github.com/Microsoft/hcsshim/internal/schema1"
Expand All @@ -20,6 +19,7 @@ import (
"github.com/Microsoft/hcsshim/internal/uvmfolder"
"github.com/Microsoft/hcsshim/internal/wclayer"
"github.com/Microsoft/hcsshim/osversion"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -85,6 +85,55 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount
return &config, nil
}

// ConvertCPULimits handles the logic of converting and validating the containers CPU limits
// specified in the OCI spec to what HCS expects.
//
// `cid` is the container's ID.
//
// `vmid` is the Utility VM's ID if the container we're constructing is going to belong to
// one.
//
// `spec` is the OCI spec for the container.
//
// `maxCPUCount` is the maximum cpu count allowed for the container. This value should
// be the number of processors on the host, or in the case of a hypervisor isolated container
// the number of processors assigned to the guest/Utility VM.
//
// Returns the cpu count, cpu limit, and cpu weight in this order. Returns an error if more than one of
// cpu count, cpu limit, or cpu weight was specified in the OCI spec as they are mutually
// exclusive.
func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUCount int32) (int32, int32, int32, error) {
cpuNumSet := 0
cpuCount := oci.ParseAnnotationsCPUCount(ctx, spec, oci.AnnotationContainerProcessorCount, 0)
if cpuCount > 0 {
cpuNumSet++
}

cpuLimit := oci.ParseAnnotationsCPULimit(ctx, spec, oci.AnnotationContainerProcessorLimit, 0)
if cpuLimit > 0 {
cpuNumSet++
}

cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, spec, oci.AnnotationContainerProcessorWeight, 0)
if cpuWeight > 0 {
cpuNumSet++
}

if cpuNumSet > 1 {
return 0, 0, 0, fmt.Errorf("invalid spec - Windows Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
} else if cpuNumSet == 1 {
if cpuCount > maxCPUCount {
log.G(ctx).WithFields(logrus.Fields{
"cid": cid,
"requested": cpuCount,
"assigned": maxCPUCount,
}).Warn("Changing user requested CPUCount to current number of processors")
cpuCount = maxCPUCount
}
}
return cpuCount, cpuLimit, cpuWeight, nil
}

// createWindowsContainerDocument creates documents for passing to HCS or GCS to create
// a container, both hosted and process isolated. It creates both v1 and v2
// container objects, WCOW only. The containers storage should have been mounted already.
Expand Down Expand Up @@ -122,100 +171,74 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname}
}

// CPU Resources
cpuNumSet := 0
cpuCount := oci.ParseAnnotationsCPUCount(ctx, coi.Spec, oci.AnnotationContainerProcessorCount, 0)
if cpuCount > 0 {
cpuNumSet++
}
var (
uvmCPUCount int32
hostCPUCount = processorinfo.ProcessorCount()
maxCPUCount = hostCPUCount
)

cpuLimit := oci.ParseAnnotationsCPULimit(ctx, coi.Spec, oci.AnnotationContainerProcessorLimit, 0)
if cpuLimit > 0 {
cpuNumSet++
if coi.HostingSystem != nil {
uvmCPUCount = coi.HostingSystem.ProcessorCount()
maxCPUCount = uvmCPUCount
}

cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, coi.Spec, oci.AnnotationContainerProcessorWeight, 0)
if cpuWeight > 0 {
cpuNumSet++
cpuCount, cpuLimit, cpuWeight, err := ConvertCPULimits(ctx, coi.ID, coi.Spec, maxCPUCount)
if err != nil {
return nil, nil, err
}

if cpuNumSet > 1 {
return nil, nil, fmt.Errorf("invalid spec - Windows Process Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
} else if cpuNumSet == 1 {
hostCPUCount := processorinfo.ProcessorCount()
// usableCPUCount is the number of processors present in whatever environment
// the container is running in. It will be either the processor count of the
// host, or the UVM, based on if the container is process or hypervisor isolated.
usableCPUCount := hostCPUCount
var uvmCPUCount int32
if coi.HostingSystem != nil {
uvmCPUCount = coi.HostingSystem.ProcessorCount()
usableCPUCount = uvmCPUCount
}
if cpuCount > usableCPUCount {
l := log.G(ctx).WithField(logfields.ContainerID, coi.ID)
if coi.HostingSystem != nil {
l.Data[logfields.UVMID] = coi.HostingSystem.ID()
}
l.WithFields(logrus.Fields{
"requested": cpuCount,
"assigned": usableCPUCount,
}).Warn("Changing user requested CPUCount to current number of processors")
cpuCount = usableCPUCount
}
if coi.ScaleCPULimitsToSandbox && cpuLimit > 0 && coi.HostingSystem != nil {
// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
// the CPU limit has been calculated based on the number of processors on the
// host, and instead re-calculate it based on the number of processors in the UVM.
//
// This is needed to work correctly with assumptions kubelet makes when computing
// the CPU limit value:
// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
// cores. So if 2000 millicores are assigned, the container can use 2 processors.
// - In Windows, the job object CPU limit is global across all processors on the
// system, and is represented as a fraction out of 10000. In this model, a limit
// of 10000 means the container can use all processors fully, regardless of how
// many processors exist on the system.
// - To convert the millicores value into the job object limit, kubelet divides
// the millicores by the number of CPU cores on the host. This causes problems
// when running inside a UVM, as the UVM may have a different number of processors
// than the host system.
//
// To work around this, we undo the division by the number of host processors, and
// re-do the division based on the number of processors inside the UVM. This will
// give the correct value based on the actual number of millicores that the kubelet
// wants the container to have.
//
// Kubelet formula to compute CPU limit:
// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
// We only apply bounds here because we are calculating the CPU limit ourselves,
// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
// In the case where we use the value directly from the user, we don't alter it to fit
// within the bounds, but just let the platform throw an error if it is invalid.
if newCPULimit < 1 {
newCPULimit = 1
} else if newCPULimit > 10000 {
newCPULimit = 10000
}
log.G(ctx).WithFields(logrus.Fields{
"hostCPUCount": hostCPUCount,
"uvmCPUCount": uvmCPUCount,
"oldCPULimit": cpuLimit,
"newCPULimit": newCPULimit,
}).Info("rescaling CPU limit for UVM sandbox")
cpuLimit = newCPULimit
if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 {
// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
// the CPU limit has been calculated based on the number of processors on the
// host, and instead re-calculate it based on the number of processors in the UVM.
//
// This is needed to work correctly with assumptions kubelet makes when computing
// the CPU limit value:
// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
// cores. So if 2000 millicores are assigned, the container can use 2 processors.
// - In Windows, the job object CPU limit is global across all processors on the
// system, and is represented as a fraction out of 10000. In this model, a limit
// of 10000 means the container can use all processors fully, regardless of how
// many processors exist on the system.
// - To convert the millicores value into the job object limit, kubelet divides
// the millicores by the number of CPU cores on the host. This causes problems
// when running inside a UVM, as the UVM may have a different number of processors
// than the host system.
//
// To work around this, we undo the division by the number of host processors, and
// re-do the division based on the number of processors inside the UVM. This will
// give the correct value based on the actual number of millicores that the kubelet
// wants the container to have.
//
// Kubelet formula to compute CPU limit:
// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
// We only apply bounds here because we are calculating the CPU limit ourselves,
// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
// In the case where we use the value directly from the user, we don't alter it to fit
// within the bounds, but just let the platform throw an error if it is invalid.
if newCPULimit < 1 {
newCPULimit = 1
} else if newCPULimit > 10000 {
newCPULimit = 10000
}
log.G(ctx).WithFields(logrus.Fields{
"hostCPUCount": hostCPUCount,
"uvmCPUCount": uvmCPUCount,
"oldCPULimit": cpuLimit,
"newCPULimit": newCPULimit,
}).Info("rescaling CPU limit for UVM sandbox")
cpuLimit = newCPULimit
}

v1.ProcessorCount = uint32(cpuCount)
v1.ProcessorMaximum = int64(cpuLimit)
v1.ProcessorWeight = uint64(cpuWeight)
v1.ProcessorCount = uint32(cpuCount)
v1.ProcessorMaximum = int64(cpuLimit)
v1.ProcessorWeight = uint64(cpuWeight)

v2Container.Processor = &hcsschema.Processor{
Count: cpuCount,
Maximum: cpuLimit,
Weight: cpuWeight,
}
v2Container.Processor = &hcsschema.Processor{
Count: cpuCount,
Maximum: cpuLimit,
Weight: cpuWeight,
}

// Memory Resources
Expand Down
28 changes: 28 additions & 0 deletions internal/jobcontainers/cpurate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package jobcontainers

import (
"testing"
)

func assertEqual(t *testing.T, a uint32, b uint32) {
if a != b {
t.Fatalf("%d != %d", a, b)
}
}

func TestJobCPURate(t *testing.T) {
rate := calculateJobCPURate(10, 1)
assertEqual(t, rate, 1000)

rate = calculateJobCPURate(10, 5)
assertEqual(t, rate, 5000)

rate = calculateJobCPURate(20, 5)
assertEqual(t, rate, 2500)

rate = calculateJobCPURate(1, 1)
assertEqual(t, rate, 10000)

rate = calculateJobCPURate(1, 0)
assertEqual(t, rate, 1)
}
46 changes: 46 additions & 0 deletions internal/jobcontainers/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package jobcontainers

import (
"unicode/utf16"
"unsafe"

"github.com/pkg/errors"
"golang.org/x/sys/windows"
)

// defaultEnvBlock will return a new environment block in the context of the user token
// `token`.
//
// This is almost a direct copy of the go stdlib implementation with some slight changes
// to force a valid token to be passed.
// https://github.com/golang/go/blob/f21be2fdc6f1becdbed1592ea0b245cdeedc5ac8/src/internal/syscall/execenv/execenv_windows.go#L24
func defaultEnvBlock(token windows.Token) (env []string, err error) {
if token == 0 {
return nil, errors.New("invalid token for creating environment block")
}

var block *uint16
if err := windows.CreateEnvironmentBlock(&block, token, false); err != nil {
return nil, err
}
defer windows.DestroyEnvironmentBlock(block)

blockp := uintptr(unsafe.Pointer(block))
for {
// find NUL terminator
end := unsafe.Pointer(blockp)
for *(*uint16)(end) != 0 {
end = unsafe.Pointer(uintptr(end) + 2)
}

n := (uintptr(end) - uintptr(unsafe.Pointer(blockp))) / 2
if n == 0 {
// environment block ends with empty string
break
}
entry := (*[(1 << 30) - 1]uint16)(unsafe.Pointer(blockp))[:n:n]
env = append(env, string(utf16.Decode(entry)))
blockp += 2 * (uintptr(len(entry)) + 1)
}
return
}
Loading