diff --git a/cmd/gcs/main.go b/cmd/gcs/main.go index 74676671a6..7dacf5d5b8 100644 --- a/cmd/gcs/main.go +++ b/cmd/gcs/main.go @@ -84,6 +84,7 @@ func readMemoryEvents(startTime time.Time, efdFile *os.File, cgName string, thre func main() { startTime := time.Now() logLevel := flag.String("loglevel", "debug", "Logging Level: debug, info, warning, error, fatal, panic.") + coreDumpLoc := flag.String("core-dump-location", "", "The location/format where process core dumps will be written to.") kmsgLogLevel := flag.Uint("kmsgLogLevel", uint(kmsg.Warning), "Log all kmsg entries with a priority less than or equal to the supplied level.") logFile := flag.String("logfile", "", "Logging Target: An optional file name/path. Omit for console output.") logFormat := flag.String("log-format", "text", "Logging Format: text or json") @@ -144,6 +145,19 @@ func main() { logrus.Info("GCS started") + // Set the process core dump location. This will be global to all containers as it's a kernel configuration. + // If no path is specified core dumps will just be placed in the working directory of wherever the process + // was invoked to a file named "core". + if *coreDumpLoc != "" { + if err := ioutil.WriteFile( + "/proc/sys/kernel/core_pattern", + []byte(*coreDumpLoc), + 0644, + ); err != nil { + logrus.WithError(err).Fatal("failed to set core dump location") + } + } + // Continuously log /dev/kmsg go kmsg.ReadForever(kmsg.LogLevel(*kmsgLogLevel)) diff --git a/internal/guest/runtime/hcsv2/container.go b/internal/guest/runtime/hcsv2/container.go index 61ff9c7ac5..c2d234fd3a 100644 --- a/internal/guest/runtime/hcsv2/container.go +++ b/internal/guest/runtime/hcsv2/container.go @@ -66,6 +66,9 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe return -1, err } + // Add in the core rlimit specified on the container in case there was one set. This makes it so that execed processes can also generate + // core dumps. + process.Rlimits = c.spec.Process.Rlimits p, err := c.container.ExecProcess(process, stdioSet) if err != nil { stdioSet.Close() diff --git a/internal/guest/runtime/hcsv2/sandbox_container.go b/internal/guest/runtime/hcsv2/sandbox_container.go index a5d80aeeb8..516ef8a0bc 100644 --- a/internal/guest/runtime/hcsv2/sandbox_container.go +++ b/internal/guest/runtime/hcsv2/sandbox_container.go @@ -108,6 +108,12 @@ func setupSandboxContainerSpec(ctx context.Context, id string, spec *oci.Spec) ( } } + if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" { + if err := setCoreRLimit(spec, rlimCore); err != nil { + return err + } + } + // TODO: JTERRY75 /dev/shm is not properly setup for LCOW I believe. CRI // also has a concept of a sandbox/shm file when the IPC NamespaceMode != // NODE. diff --git a/internal/guest/runtime/hcsv2/spec.go b/internal/guest/runtime/hcsv2/spec.go index 6abcd509d2..02168f6874 100644 --- a/internal/guest/runtime/hcsv2/spec.go +++ b/internal/guest/runtime/hcsv2/spec.go @@ -63,6 +63,32 @@ func setProcess(spec *oci.Spec) { } } +func setCoreRLimit(spec *oci.Spec, value string) error { + setProcess(spec) + + vals := strings.Split(value, ";") + if len(vals) != 2 { + return errors.New("wrong number of values supplied for rlimit core") + } + + soft, err := strconv.ParseUint(vals[0], 10, 64) + if err != nil { + return errors.Wrap(err, "failed to parse soft core rlimit") + } + hard, err := strconv.ParseUint(vals[1], 10, 64) + if err != nil { + return errors.Wrap(err, "failed to parse hard core rlimit") + } + + spec.Process.Rlimits = append(spec.Process.Rlimits, oci.POSIXRlimit{ + Type: "RLIMIT_CORE", + Soft: soft, + Hard: hard, + }) + + return nil +} + // setUserStr sets `spec.Process` to the valid `userstr` based on the OCI Image Spec // v1.0.0 `userstr`. // diff --git a/internal/guest/runtime/hcsv2/workload_container.go b/internal/guest/runtime/hcsv2/workload_container.go index 1e83f6a7f3..2148384316 100644 --- a/internal/guest/runtime/hcsv2/workload_container.go +++ b/internal/guest/runtime/hcsv2/workload_container.go @@ -161,6 +161,12 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci. return err } + if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" { + if err := setCoreRLimit(spec, rlimCore); err != nil { + return err + } + } + // Force the parent cgroup into our /containers root spec.Linux.CgroupsPath = "/containers/" + id diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 0482b07773..54c4be1eaf 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -4,6 +4,7 @@ package hcsoci import ( "context" + "errors" "fmt" "path/filepath" "regexp" @@ -385,6 +386,51 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } v2Container.AdditionalDeviceNamespace = extensions + // Process dump setup (if requested) + dumpPath := "" + if coi.HostingSystem != nil { + dumpPath = coi.HostingSystem.ProcessDumpLocation() + } + + if specDumpPath, ok := coi.Spec.Annotations[oci.AnnotationContainerProcessDumpLocation]; ok { + // If a process dump path was specified at pod creation time for a hypervisor isolated pod, then + // use this value. If one was specified on the container creation document then override with this + // instead. Unlike Linux, Windows containers can set the dump path on a per container basis. + dumpPath = specDumpPath + } + + if dumpPath != "" { + dumpType, err := parseDumpType(coi.Spec.Annotations) + if err != nil { + return nil, nil, err + } + + // Setup WER registry keys for local process dump creation if specified. + // https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + v2Container.RegistryChanges = &hcsschema.RegistryChanges{ + AddValues: []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpFolder", + StringValue: dumpPath, + Type_: "String", + }, + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpType", + DWordValue: dumpType, + Type_: "DWord", + }, + }, + } + } + return v1, v2Container, nil } @@ -415,3 +461,22 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h v2.AssignedDevices = v2AssignedDevices return nil } + +// parseDumpType parses the passed in string representation of the local user mode process dump type to the +// corresponding value the registry expects to be set. +// +// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings +func parseDumpType(annotations map[string]string) (int32, error) { + dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType] + switch dmpTypeStr { + case "": + // If no type specified, default to full dumps. + return 2, nil + case "mini": + return 1, nil + case "full": + return 2, nil + default: + return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`) + } +} diff --git a/internal/oci/annotations.go b/internal/oci/annotations.go index 5c28ea02c6..bed5230470 100644 --- a/internal/oci/annotations.go +++ b/internal/oci/annotations.go @@ -221,4 +221,17 @@ const ( // AnnotationSecurityPolicy is used to specify a security policy for opengcs to enforce AnnotationSecurityPolicy = "io.microsoft.virtualmachine.lcow.securitypolicy" + // AnnotationContainerProcessDumpLocation specifies a path inside of containers to save process dumps to. As + // the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of + // some kind (vhd, bind mount, fileshare mount etc.) + AnnotationContainerProcessDumpLocation = "io.microsoft.container.processdumplocation" + + // AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode + // process dump for Windows containers. The supported options are "mini", and "full". + // See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype" + + // AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set + // in order to have core dumps generated for a given container. + AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore" ) diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 264f6a1a6d..c77a90731a 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -332,7 +332,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.EnableScratchEncryption = parseAnnotationsBool(ctx, s.Annotations, AnnotationEncryptedScratchDisk, lopts.EnableScratchEncryption) lopts.SecurityPolicy = parseAnnotationsString(s.Annotations, AnnotationSecurityPolicy, lopts.SecurityPolicy) lopts.KernelBootOptions = parseAnnotationsString(s.Annotations, AnnotationKernelBootOptions, lopts.KernelBootOptions) - + lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, lopts.ProcessDumpLocation) handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts) handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts) @@ -357,6 +357,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID) wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy) wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap) + wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, wopts.ProcessDumpLocation) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil { return nil, err diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 994f96beac..e77a08f3a1 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -85,6 +85,11 @@ type Options struct { // that receives the UVMs set of NICs from this proxy instead of enumerating // the endpoints locally. NetworkConfigProxy string + + // Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be + // applied to all containers. On Windows it's configurable per container, but we can mimic this for + // Windows by just applying the location specified here per container. + ProcessDumpLocation string } // compares the create opts used during template creation with the create opts @@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool { return uvm.physicallyBacked } +// ProcessDumpLocation returns the location that process dumps will get written to for containers running +// in the UVM. +func (uvm *UtilityVM) ProcessDumpLocation() string { + return uvm.processDumpLocation +} + func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 { actual := (requested + 1) &^ 1 // align up to an even number if requested != actual { diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index db55674547..2a5e8ad240 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -384,6 +384,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error initArgs += " " + opts.ExecCommandLine + if opts.ProcessDumpLocation != "" { + initArgs += " -core-dump-location " + opts.ProcessDumpLocation + } + if vmDebugging { // Launch a shell on the console. initArgs = `sh -c "` + initArgs + ` & exec sh"` diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index c21fc5d1e4..ee70a67f9b 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv }, } + var registryChanges hcsschema.RegistryChanges + // We're getting asked to setup local dump collection for WCOW. We need to: + // + // 1. Turn off WER reporting, so we don't both upload the dump and save a local copy. + // 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions. + // https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192 + // This supposedly should be fixed soon but for now keep this until we know which container images + // (1809, 1903/9, 2004 etc.) this went out too. + if opts.ProcessDumpLocation != "" { + uvm.processDumpLocation = opts.ProcessDumpLocation + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "ControlSet001\\Services\\WerSvc", + }, + Name: "Start", + DWordValue: 2, + Type_: "DWord", + }, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting", + }, + Name: "Disabled", + DWordValue: 1, + Type_: "DWord", + }, + ) + } + // Here for a temporary workaround until the need for setting this regkey is no more. To protect // against any undesired behavior (such as some general networking scenarios ceasing to function) // with a recent change to fix SMB share access in the UVM, this registry key will be checked to // enable the change in question inside GNS.dll. - var registryChanges hcsschema.RegistryChanges if !opts.DisableCompartmentNamespace { - registryChanges = hcsschema.RegistryChanges{ - AddValues: []hcsschema.RegistryValue{ - { - Key: &hcsschema.RegistryKey{ - Hive: "System", - Name: "CurrentControlSet\\Services\\gns", - }, - Name: "EnableCompartmentNamespace", - DWordValue: 1, - Type_: "DWord", + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "CurrentControlSet\\Services\\gns", }, + Name: "EnableCompartmentNamespace", + DWordValue: 1, + Type_: "DWord", }, - } + ) } processor := &hcsschema.Processor2{ diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 904fd112aa..5d7e528d87 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -122,9 +122,13 @@ type UtilityVM struct { // is true TemplateID string + // Location that container process dumps will get written too. + processDumpLocation string + // The CreateOpts used to create this uvm. These can be either of type // uvm.OptionsLCOW or uvm.OptionsWCOW createOpts interface{} + // Network config proxy client. If nil then this wasn't requested and the // uvms network will be configured locally. ncProxyClient ncproxyttrpc.NetworkConfigProxyService diff --git a/test/cri-containerd/main.go b/test/cri-containerd/main.go index d723f52d49..015d6a3337 100644 --- a/test/cri-containerd/main.go +++ b/test/cri-containerd/main.go @@ -48,6 +48,8 @@ const ( lcowRuntimeHandler = "runhcs-lcow" imageLcowK8sPause = "k8s.gcr.io/pause:3.1" imageLcowAlpine = "docker.io/library/alpine:latest" + imageLcowAlpineCoreDump = "cplatpublic.azurecr.io/stackoverflow-alpine:latest" + imageWindowsProcessDump = "cplatpublic.azurecr.io/crashdump:latest" imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" imageJobContainerHNS = "cplatpublic.azurecr.io/jobcontainer_hns:latest" imageJobContainerETW = "cplatpublic.azurecr.io/jobcontainer_etw:latest" @@ -162,7 +164,7 @@ func getWindowsNanoserverImage(build uint16) string { case osversion.V20H2: return "mcr.microsoft.com/windows/nanoserver:2009" default: - panic("unsupported build") + return "mcr.microsoft.com/windows/nanoserver:2009" } } @@ -179,7 +181,7 @@ func getWindowsServerCoreImage(build uint16) string { case osversion.V20H2: return "mcr.microsoft.com/windows/servercore:2009" default: - panic("unsupported build") + return "mcr.microsoft.com/windows/nanoserver:2009" } } diff --git a/test/cri-containerd/runpodsandbox_test.go b/test/cri-containerd/runpodsandbox_test.go index fec2650778..05a06c3ec9 100644 --- a/test/cri-containerd/runpodsandbox_test.go +++ b/test/cri-containerd/runpodsandbox_test.go @@ -1361,6 +1361,226 @@ func Test_RunPodSandbox_MultipleContainersSameVhd_WCOW(t *testing.T) { } } +func Test_RunPodSandbox_ProcessDump_LCOW(t *testing.T) { + requireFeatures(t, featureLCOW) + + pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpineCoreDump}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sbRequest := getRunPodSandboxRequest(t, lcowRuntimeHandler, map[string]string{ + oci.AnnotationContainerProcessDumpLocation: "/coredumps/core", + }) + + podID := runPodSandbox(t, client, ctx, sbRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + mounts := []*runtime.Mount{ + { + HostPath: "sandbox:///coredump", + ContainerPath: "/coredumps", + }, + } + + annotations := map[string]string{ + oci.AnnotationRLimitCore: "18446744073709551615;18446744073709551615", + } + + // Setup container 1 that uses an image that stackoverflows shortly after starting. + // This should generate a core dump file in the sandbox mount location + c1Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container1", + }, + Image: &runtime.ImageSpec{ + Image: imageLcowAlpineCoreDump, + }, + Command: []string{ + "./stackoverflow", + }, + Annotations: annotations, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + container1ID := createContainer(t, client, ctx, c1Request) + defer removeContainer(t, client, ctx, container1ID) + + startContainer(t, client, ctx, container1ID) + defer stopContainer(t, client, ctx, container1ID) + + // Then setup a secondary container that will mount the same sandbox mount and + // just verify that the core dump file is present. + c2Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container2", + }, + Image: &runtime.ImageSpec{ + Image: imageLcowAlpineCoreDump, + }, + // Hold this command open until killed + Command: []string{ + "top", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + mounts = []*runtime.Mount{ + { + HostPath: "sandbox:///coredump", + ContainerPath: "/coredumps", + }, + } + + // Wait for the first container to die and create the core dump. + time.Sleep(time.Second * 5) + + container2ID := createContainer(t, client, ctx, c2Request) + defer removeContainer(t, client, ctx, container2ID) + + startContainer(t, client, ctx, container2ID) + defer stopContainer(t, client, ctx, container2ID) + + // Check if the core dump file is present + execCommand := []string{ + "ls", + "/coredumps/core", + } + execRequest := &runtime.ExecSyncRequest{ + ContainerId: container2ID, + Cmd: execCommand, + Timeout: 20, + } + + r := execSync(t, client, ctx, execRequest) + if r.ExitCode != 0 { + t.Fatalf("failed with exit code %d running `ls`: %s", r.ExitCode, string(r.Stderr)) + } +} + +func Test_RunPodSandbox_ProcessDump_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor) + + pullRequiredImages(t, []string{imageWindowsProcessDump}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sbRequest := getRunPodSandboxRequest(t, wcowHypervisor19041RuntimeHandler, map[string]string{ + oci.AnnotationContainerProcessDumpLocation: "C:\\processdump", + }) + + podID := runPodSandbox(t, client, ctx, sbRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + mounts := []*runtime.Mount{ + { + HostPath: "sandbox:///processdump", + ContainerPath: "C:\\processdump", + }, + } + + // Setup container 1 that uses an image that throws a user exception shortly after starting. + // This should generate a process dump file in the sandbox mount location + c1Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container1", + }, + Image: &runtime.ImageSpec{ + Image: imageWindowsProcessDump, + }, + Command: []string{ + "C:\\app\\crashtest.exe", + "ue", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + container1ID := createContainer(t, client, ctx, c1Request) + defer removeContainer(t, client, ctx, container1ID) + + startContainer(t, client, ctx, container1ID) + defer stopContainer(t, client, ctx, container1ID) + + // Then setup a secondary container that will mount the same sandbox mount and + // just verify that the process dump file is present. + c2Request := &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container2", + }, + Image: &runtime.ImageSpec{ + Image: imageWindowsProcessDump, + }, + // Hold this command open until killed + Command: []string{ + "cmd", + "/c", + "ping", + "-t", + "127.0.0.1", + }, + Mounts: mounts, + }, + PodSandboxId: podID, + SandboxConfig: sbRequest.Config, + } + + mounts = []*runtime.Mount{ + { + HostPath: "sandbox:///processdump", + ContainerPath: "C:\\processdump", + }, + } + + // Wait for the first container to die and create the process dump. + time.Sleep(time.Second * 10) + + container2ID := createContainer(t, client, ctx, c2Request) + defer removeContainer(t, client, ctx, container2ID) + + startContainer(t, client, ctx, container2ID) + defer stopContainer(t, client, ctx, container2ID) + + // Check if the core dump file is present + execCommand := []string{ + "cmd", + "/c", + "dir", + "C:\\processdump", + } + execRequest := &runtime.ExecSyncRequest{ + ContainerId: container2ID, + Cmd: execCommand, + Timeout: 20, + } + + r := execSync(t, client, ctx, execRequest) + if r.ExitCode != 0 { + t.Fatalf("failed with exit code %d running `dir`: %s", r.ExitCode, string(r.Stderr)) + } + + if !strings.Contains(string(r.Stdout), ".dmp") { + t.Fatalf("expected dmp file to be present in the directory, got: %s", string(r.Stdout)) + } +} + func createSandboxContainerAndExecForCustomScratch(t *testing.T, annotations map[string]string) (string, string, int) { cmd := []string{ "df", diff --git a/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile b/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile new file mode 100644 index 0000000000..d750f3ad33 --- /dev/null +++ b/test/cri-containerd/test-images/stackoverflow-lcow/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine + +RUN apk add --no-cache build-base +WORKDIR /app +COPY main.c . + +RUN gcc -g -o stackoverflow main.c \ No newline at end of file diff --git a/test/cri-containerd/test-images/stackoverflow-lcow/main.c b/test/cri-containerd/test-images/stackoverflow-lcow/main.c new file mode 100644 index 0000000000..bee9154c1e --- /dev/null +++ b/test/cri-containerd/test-images/stackoverflow-lcow/main.c @@ -0,0 +1,10 @@ +#include + +void foo(){ + foo(); +} + +int main() { + foo(); + return 0; +} \ No newline at end of file diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go index 0482b07773..54c4be1eaf 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcsoci/hcsdoc_wcow.go @@ -4,6 +4,7 @@ package hcsoci import ( "context" + "errors" "fmt" "path/filepath" "regexp" @@ -385,6 +386,51 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } v2Container.AdditionalDeviceNamespace = extensions + // Process dump setup (if requested) + dumpPath := "" + if coi.HostingSystem != nil { + dumpPath = coi.HostingSystem.ProcessDumpLocation() + } + + if specDumpPath, ok := coi.Spec.Annotations[oci.AnnotationContainerProcessDumpLocation]; ok { + // If a process dump path was specified at pod creation time for a hypervisor isolated pod, then + // use this value. If one was specified on the container creation document then override with this + // instead. Unlike Linux, Windows containers can set the dump path on a per container basis. + dumpPath = specDumpPath + } + + if dumpPath != "" { + dumpType, err := parseDumpType(coi.Spec.Annotations) + if err != nil { + return nil, nil, err + } + + // Setup WER registry keys for local process dump creation if specified. + // https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + v2Container.RegistryChanges = &hcsschema.RegistryChanges{ + AddValues: []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpFolder", + StringValue: dumpPath, + Type_: "String", + }, + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", + }, + Name: "DumpType", + DWordValue: dumpType, + Type_: "DWord", + }, + }, + } + } + return v1, v2Container, nil } @@ -415,3 +461,22 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h v2.AssignedDevices = v2AssignedDevices return nil } + +// parseDumpType parses the passed in string representation of the local user mode process dump type to the +// corresponding value the registry expects to be set. +// +// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings +func parseDumpType(annotations map[string]string) (int32, error) { + dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType] + switch dmpTypeStr { + case "": + // If no type specified, default to full dumps. + return 2, nil + case "mini": + return 1, nil + case "full": + return 2, nil + default: + return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`) + } +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go index 5c28ea02c6..bed5230470 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/annotations.go @@ -221,4 +221,17 @@ const ( // AnnotationSecurityPolicy is used to specify a security policy for opengcs to enforce AnnotationSecurityPolicy = "io.microsoft.virtualmachine.lcow.securitypolicy" + // AnnotationContainerProcessDumpLocation specifies a path inside of containers to save process dumps to. As + // the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of + // some kind (vhd, bind mount, fileshare mount etc.) + AnnotationContainerProcessDumpLocation = "io.microsoft.container.processdumplocation" + + // AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode + // process dump for Windows containers. The supported options are "mini", and "full". + // See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps + AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype" + + // AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set + // in order to have core dumps generated for a given container. + AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore" ) diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go index 264f6a1a6d..c77a90731a 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/oci/uvm.go @@ -332,7 +332,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.EnableScratchEncryption = parseAnnotationsBool(ctx, s.Annotations, AnnotationEncryptedScratchDisk, lopts.EnableScratchEncryption) lopts.SecurityPolicy = parseAnnotationsString(s.Annotations, AnnotationSecurityPolicy, lopts.SecurityPolicy) lopts.KernelBootOptions = parseAnnotationsString(s.Annotations, AnnotationKernelBootOptions, lopts.KernelBootOptions) - + lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, lopts.ProcessDumpLocation) handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts) handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts) @@ -357,6 +357,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID) wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy) wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap) + wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, wopts.ProcessDumpLocation) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil { return nil, err diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go index 994f96beac..e77a08f3a1 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go @@ -85,6 +85,11 @@ type Options struct { // that receives the UVMs set of NICs from this proxy instead of enumerating // the endpoints locally. NetworkConfigProxy string + + // Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be + // applied to all containers. On Windows it's configurable per container, but we can mimic this for + // Windows by just applying the location specified here per container. + ProcessDumpLocation string } // compares the create opts used during template creation with the create opts @@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool { return uvm.physicallyBacked } +// ProcessDumpLocation returns the location that process dumps will get written to for containers running +// in the UVM. +func (uvm *UtilityVM) ProcessDumpLocation() string { + return uvm.processDumpLocation +} + func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 { actual := (requested + 1) &^ 1 // align up to an even number if requested != actual { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go index db55674547..2a5e8ad240 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go @@ -384,6 +384,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error initArgs += " " + opts.ExecCommandLine + if opts.ProcessDumpLocation != "" { + initArgs += " -core-dump-location " + opts.ProcessDumpLocation + } + if vmDebugging { // Launch a shell on the console. initArgs = `sh -c "` + initArgs + ` & exec sh"` diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go index c21fc5d1e4..ee70a67f9b 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go @@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv }, } + var registryChanges hcsschema.RegistryChanges + // We're getting asked to setup local dump collection for WCOW. We need to: + // + // 1. Turn off WER reporting, so we don't both upload the dump and save a local copy. + // 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions. + // https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192 + // This supposedly should be fixed soon but for now keep this until we know which container images + // (1809, 1903/9, 2004 etc.) this went out too. + if opts.ProcessDumpLocation != "" { + uvm.processDumpLocation = opts.ProcessDumpLocation + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "ControlSet001\\Services\\WerSvc", + }, + Name: "Start", + DWordValue: 2, + Type_: "DWord", + }, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting", + }, + Name: "Disabled", + DWordValue: 1, + Type_: "DWord", + }, + ) + } + // Here for a temporary workaround until the need for setting this regkey is no more. To protect // against any undesired behavior (such as some general networking scenarios ceasing to function) // with a recent change to fix SMB share access in the UVM, this registry key will be checked to // enable the change in question inside GNS.dll. - var registryChanges hcsschema.RegistryChanges if !opts.DisableCompartmentNamespace { - registryChanges = hcsschema.RegistryChanges{ - AddValues: []hcsschema.RegistryValue{ - { - Key: &hcsschema.RegistryKey{ - Hive: "System", - Name: "CurrentControlSet\\Services\\gns", - }, - Name: "EnableCompartmentNamespace", - DWordValue: 1, - Type_: "DWord", + registryChanges.AddValues = append(registryChanges.AddValues, + hcsschema.RegistryValue{ + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "CurrentControlSet\\Services\\gns", }, + Name: "EnableCompartmentNamespace", + DWordValue: 1, + Type_: "DWord", }, - } + ) } processor := &hcsschema.Processor2{ diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go index 904fd112aa..5d7e528d87 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go @@ -122,9 +122,13 @@ type UtilityVM struct { // is true TemplateID string + // Location that container process dumps will get written too. + processDumpLocation string + // The CreateOpts used to create this uvm. These can be either of type // uvm.OptionsLCOW or uvm.OptionsWCOW createOpts interface{} + // Network config proxy client. If nil then this wasn't requested and the // uvms network will be configured locally. ncProxyClient ncproxyttrpc.NetworkConfigProxyService