Skip to content

[Bug] Repeated inference with dynamic shape leads to out of memory error #8233

@dvhg

Description

@dvhg

I'm trying to run PyTorch MaskRCNN on GPU and have been running into GPU memory issues. I get errors when running repeated inferences using different inputs. There's some variety in the error messages but this is the most common:

terminate called after throwing an instance of 'dmlc::Error'
  what():  [20:11:56] /home/ubuntu/tvm/include/tvm/runtime/device_api.h:260: unknown type =0

When looking at GPU memory usage (using nvidia-smi), I see memory usage increases over time until the test crashes once it nears the maximum. I'm running this on Ubuntu 18.04 and a T4 GPU with 16GB of GPU memory.

Following the form of the unit test from test_tensorrt.py, the following script should reproduce the problem I'm seeing (using the COCO dataset). It differs from the unit test in 2 ways:

  1. The VM is run on GPU instead of CPU:
ctx = tvm.gpu(0)
vm = VirtualMachine(vm_exec, ctx)
  1. Inference is run on many different inputs (from COCO dataset) rather than a single inference.

@masahi, I heard you've been working on PyTorch MaskRCNN. Have you seen this issue in your testing, or is there a problem in my script? Thank you!

import tvm
from tvm import relay
from tvm import relay
from tvm.runtime.vm import VirtualMachine
from tvm.contrib.download import download

import numpy as np
import cv2

import torch
import torchvision

in_size = 300

input_shape = (1, 3, in_size, in_size)


def do_trace(model, inp):
    model_trace = torch.jit.trace(model, inp)
    model_trace.eval()
    return model_trace


def dict_to_tuple(out_dict):
    if "masks" in out_dict.keys():
        return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
    return out_dict["boxes"], out_dict["scores"], out_dict["labels"]


class TraceWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, inp):
        out = self.model(inp)
        return dict_to_tuple(out[0])


model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
model = TraceWrapper(model_func(pretrained=True))

model.eval()
inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=(1, 3, in_size, in_size)))

with torch.no_grad():
    out = model(inp)
    script_module = do_trace(model, inp)

input_name = "input0"
shape_list = [(input_name, input_shape)]
mod, params = relay.frontend.from_pytorch(script_module, shape_list)
target = "cuda"

with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
    vm_exec = relay.vm.compile(mod, target=target, params=params)

ctx = tvm.gpu(0)
vm = VirtualMachine(vm_exec, ctx)

img_dirpath = 'data/COCO_2017/subset/val2017/'
i = 0
import os
for root, dirs, files in os.walk(img_dirpath):
    for f in files:
        print(i)
        i += 1
        imgname = os.path.join(root, f)
        img = cv2.imread(imgname)
        img = cv2.resize(img, (in_size, in_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.transpose(img / 255.0, [2, 0, 1])
        img = np.expand_dims(img, axis=0).astype('float32')
        vm.set_input("main", **{input_name: img})
        tvm_res = vm.run()

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions