-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Move the allocates of AoT codegen to be TVMBAWs #9065
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| generate_ref_data, | ||
| convert_to_relay, | ||
| compile_and_run, | ||
| compile_models, | ||
| parametrize_aot_options, | ||
| ) | ||
|
|
||
|
|
@@ -643,5 +644,45 @@ def test_memory_planning(workspace_byte_alignment, main_workspace_size, sum_work | |
| ) | ||
|
|
||
|
|
||
| def test_aot_codegen_backend_alloc_workspace_calls(): | ||
| """This test checks whether AoT lowering creates TVMBackendAllocWorkspace calls""" | ||
|
|
||
| # The %data and %weight shapes in the following primitive Relay should create | ||
| # small tensors that would get lowered to stack allocations in the CPU PrimFuncs. | ||
| # However, the AoT executor codegen should retain them as TVMBAW calls | ||
| relay_mod = tvm.parser.fromtext( | ||
| """ | ||
| #[version = "0.0.5"] | ||
| def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] { | ||
| %0 = fn (%p02: Tensor[(1, 4, 4, 4), float32], Primitive=1, hash="9332b3872fb5292c", src_layout="NCHW", dst_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] { | ||
| layout_transform(%p02, src_layout="NCHW", dst_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */ | ||
| }; | ||
| %1 = fn (%p03: Tensor[(4, 4, 3, 3), float32], Primitive=1, hash="9f0b2b8a24a4dab3", src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 1, 3, 3, 4, 4), float32] { | ||
| layout_transform(%p03, src_layout="OIHW", dst_layout="OIHW4i4o") /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */ | ||
| }; | ||
| %2 = %0(%data) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */; | ||
| %3 = %1(%weight) /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */; | ||
| %4 = fn (%p01: Tensor[(1, 1, 4, 4, 4), float32], %p1: Tensor[(1, 1, 3, 3, 4, 4), float32], out_layout="NCHW4c", kernel_layout="OIHW4i4o", Primitive=1, data_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] { | ||
| nn.contrib_conv2d_NCHWc(%p01, %p1, padding=[1, 1, 1, 1], channels=4, kernel_size=[3, 3], data_layout="NCHW4c", kernel_layout="OIHW4i4o", out_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */ | ||
| }; | ||
| %5 = %4(%2, %3) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */; | ||
| %6 = fn (%p0: Tensor[(1, 1, 4, 4, 4), float32], Primitive=1, src_layout="NCHW4c", dst_layout="NCHW") -> Tensor[(1, 4, 4, 4), float32] { | ||
| layout_transform(%p0, src_layout="NCHW4c", dst_layout="NCHW") /* ty=Tensor[(1, 4, 4, 4), float32] */ | ||
| }; | ||
| %6(%5) /* ty=Tensor[(1, 4, 4, 4), float32] */ | ||
| } | ||
| """ | ||
| ) | ||
| compiled_runtime_modules = compile_models( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. want to assert that all the tir.allocate nodes are correctly tagged somewhere?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah it is a bit cumbersome to do that :), Instead I used relay in primitive form so its clear that main function should only have three allocates.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mbs-octoml @denise-k can we add a tracking/cleanup task to make this kind of assert easier to write? And flag to cleanup this test?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @areusch roadmap item and task tracking have been created. |
||
| AOTTestModel(module=relay_mod, inputs=None, outputs=None), | ||
| "c", | ||
| True, | ||
| ) | ||
| source = compiled_runtime_modules[0].lib.imported_modules[0].get_source() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The AoT codegen for main ends up as an imported module? Naively I would expect the TVMBackendAllocateWorkspace calls in the imported_modules list to be intra-op only, e.g. for the conv2d, not AoT main.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can understand the reasoning but the current flow just creates per target IRModules just before runtime.Modules are created. Therefore all host_target (i.e. CPU) PrimFuncs end up in a single runtime.Module. |
||
| # There should be three allocates created for three primitive relay function | ||
| # calls in the main for the above relay snippet. | ||
| assert source.count("TVMBackendAllocWorkspace") == 3 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(pytest.main([__file__] + sys.argv[1:])) | ||
Uh oh!
There was an error while loading. Please reload this page.