diff --git a/docker/Dockerfile.demo_vitis_ai b/docker/Dockerfile.demo_vitis_ai
index 58326b66bf0c..8cc623e2f38c 100644
--- a/docker/Dockerfile.demo_vitis_ai
+++ b/docker/Dockerfile.demo_vitis_ai
@@ -20,10 +20,13 @@ FROM xilinx/vitis-ai:latest
RUN apt-get update --fix-missing
-
COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
RUN bash /install/ubuntu_install_core.sh
+# Install Vitis-AI ubuntu dependencies
+COPY install/ubuntu_install_vitis_ai_core.sh /install/ubuntu_install_vitis_ai_core.sh
+RUN bash /install/ubuntu_install_vitis_ai_core.sh
+
COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
RUN bash /install/ubuntu_install_python.sh
@@ -43,10 +46,6 @@ ENV PATH $PATH:$CARGO_HOME/bin:/usr/lib/go-1.10/bin
COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh
RUN bash /install/ubuntu_install_java.sh
-# Install Vitis-AI ubuntu dependencies
-COPY install/ubuntu_install_vitis_ai_core.sh /install/ubuntu_install_vitis_ai_core.sh
-RUN bash /install/ubuntu_install_vitis_ai_core.sh
-
# Install dependencies inside vitis-ai-tensorflow conda
RUN . $VAI_ROOT/conda/etc/profile.d/conda.sh && \
conda activate vitis-ai-tensorflow && \
diff --git a/docker/install/ubuntu_install_vitis_ai_core.sh b/docker/install/ubuntu_install_vitis_ai_core.sh
index ea05ffd170fe..a2d7c2ebe332 100644
--- a/docker/install/ubuntu_install_vitis_ai_core.sh
+++ b/docker/install/ubuntu_install_vitis_ai_core.sh
@@ -21,9 +21,9 @@ set -u
set -o pipefail
# install libraries for building Vitis-AI on ubuntu
-apt-get update && apt-get install -y --no-install-recommends \
- graphviz\
- gnupg2
-
-apt-get update && apt-get install -y gcc-aarch64-linux-gnu
-
+apt-get update && apt-get install -y \
+ graphviz \
+ gnupg2 \
+ gpg-agent \
+ gcc-aarch64-linux-gnu \
+ && rm -rf /var/lib/apt/lists/*
diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst
index df29f16f9d8d..7de8f58ce54f 100755
--- a/docs/deploy/vitis_ai.rst
+++ b/docs/deploy/vitis_ai.rst
@@ -304,15 +304,22 @@ Edge hardware setup
This section provides instructions for setting up with the `Pynq `__ platform but
Petalinux based flows are also supported.
-1. Download the Pynq v2.5 image for your target (use Z1 or Z2 for
+1. Download the Pynq v2.6 image for your target (use Z1 or Z2 for
Ultra96 target depending on board version) Link to image:
- https://github.com/Xilinx/PYNQ/releases/tag/v2.5
+ https://github.com/Xilinx/PYNQ/releases/tag/v2.6.0
2. Follow Pynq instructions for setting up the board: `pynq
setup `__
-3. After connecting to the board, make sure to run as root. Execute
+3. After connecting to the board, make sure to run as root. **Execute**
``su``
-4. Set up DPU on Pynq by following the steps here: `DPU Pynq
- setup `__
+4. Set up DPU on Pynq:
+
+ .. code:: bash
+
+ git clone --branch v1.2.0 --recursive --shallow-submodules https://github.com/Xilinx/DPU-PYNQ.git
+ cd DPU-PYNQ/upgrade
+ make
+ pip3 install pynq-dpu==1.2.0
+
5. Run the following command to download the DPU bitstream:
.. code:: bash
@@ -343,7 +350,7 @@ interface between TVM and Vitis-AI tools.
.. code:: bash
apt-get install libhdf5-dev
- pip3 install pydot h5py
+ pip3 install pydot==1.4.1 h5py==2.8.0
2. Install PyXIR
@@ -362,16 +369,17 @@ interface between TVM and Vitis-AI tools.
mkdir build
cp cmake/config.cmake build
cd build
+ echo set\(USE_LLVM OFF\) >> config.cmake
echo set\(USE_VITIS_AI ON\) >> config.cmake
cmake ..
- make
+ make tvm_runtime -j$(nproc)
4. Install TVM
.. code:: bash
cd tvm/python
- pip3 install -e . --user
+ pip3 install -e .
5. Check whether the setup was successful in the Python shell:
@@ -441,7 +449,7 @@ TVM.
import tvm
import tvm.relay as relay
from tvm.contrib.target import vitis_ai
- from tvm.contrib import util, graph_runtime
+ from tvm.contrib import utils, graph_runtime
from tvm.relay.build_module import bind_params_by_name
from tvm.relay.op.contrib.vitis_ai import annotation
@@ -524,6 +532,8 @@ model in TVM with Vitis-AI at the edge. The first couple of steps will
have to be run on the host machine and take care of quantization and
compilation for deployment at the edge.
+A complete ResNet 18 example can be found `here `__.
+
Host steps
^^^^^^^^^^
@@ -541,7 +551,7 @@ TVM.
import tvm
import tvm.relay as relay
from tvm.contrib.target import vitis_ai
- from tvm.contrib import util, graph_runtime
+ from tvm.contrib import utils, graph_runtime
from tvm.relay.build_module import bind_params_by_name
from tvm.relay.op.contrib.vitis_ai import annotation
@@ -549,12 +559,47 @@ After importing a convolutional neural network model using the usual
Relay API's, annotate the Relay expression for the given Vitis-AI DPU
target and partition the graph.
+.. note::
+
+ We recommend converting DPU convolutions' data layouts to NHWC and CPU convolutions'
+ data layouts to NCHW for best DPU and out of the box CPU performance. You can use the
+ ConvertLayout transformation pass two times to achieve this as demonstrated in the code
+ block underneath. You can also leave the CPU convolution layouts in NHWC and tune ARM CPU
+ performance for this data layout to avoid the layout transformation overheads introduced by
+ executing DPU convolutions in NHWC and CPU convolutions in NCHW
+ (check out the `AutoScheduling `__
+ and `AutoTuning `__
+ tutorials for this).
+
.. code:: python
mod["main"] = bind_params_by_name(mod["main"], params)
+
+ # For edge DPU we recommend converting the convolutions' data layout
+ # to NHWC for best performance. Therefore, we first convert the layouts
+ # of all convolutions to NHWC before partitioning. Afterwards, we can
+ # convert any remaining convolutions (to be executed on CPU) back to NCHW.
+ desired_layouts = {'nn.conv2d': ['NHWC', 'default']}
+ seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(),
+ relay.transform.ConvertLayout(desired_layouts),
+ relay.transform.FoldConstant()])
+ with tvm.transform.PassContext(opt_level=3):
+ mod = seq(mod)
+
+ # Annotate and partition the Relay expression for the given target
mod = annotation(mod, params, target)
mod = relay.transform.MergeCompilerRegions()(mod)
mod = relay.transform.PartitionGraph()(mod)
+
+ # After partitioning we recommend transforming the remaining convolutions
+ # (that will be executed on CPU, if any) back to NCHW data layout
+ # for best CPU performance
+ desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
+ seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(),
+ relay.transform.ConvertLayout(desired_layouts),
+ relay.transform.FoldConstant()])
+ with tvm.transform.PassContext(opt_level=3):
+ mod = seq(mod)
Now, we can build the TVM runtime library for executing the model. The
TVM target is 'llvm' as the operations that can't be handled by the DPU
@@ -572,13 +617,9 @@ can be included.
.. code:: python
- from tvm.contrib import util
-
- temp = util.tempdir()
-
tvm_target = 'llvm'
target='DPUCZDX8G-zcu104'
- export_rt_mod_file = temp.relpath("vitis_ai.rtmod")
+ export_rt_mod_file = "vitis_ai.rtmod"
with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target,
'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}):
@@ -604,9 +645,9 @@ Save the TVM lib module so that the Vitis-AI runtime module will also be exporte
.. code:: python
- from tvm.contrib import util
+ from tvm.contrib import utils
- temp = util.tempdir()
+ temp = utils.tempdir()
lib.export_library(temp.relpath("tvm_lib.so"))
After quantizing and compiling the model for Vitis-AI acceleration using the
@@ -638,15 +679,31 @@ Edge steps
^^^^^^^^^^
After setting up TVM with Vitis-AI on the edge device, you can now load
-the TVM runtime module into memory and feed inputs for inference.
+the TVM runtime module into memory and feed inputs for inference. A nearly
+complete runtiem script can be found underneath. Make sure to run the script
+as root (execute ``su`` in terminal to log into root).
+
+
+.. note::
+
+ You will see a warning about the 'cpu-tf' runtime not being found. This warning is
+ expected on the board and can be ignored. Note also that you **shouldn't** import the
+ PyXIR targets in the run script (``import pyxir.contrib.target.DPUCZDX8G``).
.. code:: python
+ import pyxir
+ import tvm
+ from tvm.contrib import graph_runtime
+
ctx = tvm.cpu()
+
+ # input_name = ...
+ # input_data = ...
# load the module into memory
lib = tvm.runtime.load_module("tvm_dpu_arm.so")
module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
- module.set_input(name, data)
+ module.set_input(input_name, input_data)
module.run()
diff --git a/python/tvm/contrib/target/vitis_ai.py b/python/tvm/contrib/target/vitis_ai.py
index d4931d9e3f48..f319fd799829 100644
--- a/python/tvm/contrib/target/vitis_ai.py
+++ b/python/tvm/contrib/target/vitis_ai.py
@@ -132,14 +132,14 @@ def vitis_ai_compiler(ref):
layers = xgraph.get_layers()
# Get the output tensor names using XGraph and output Relay ids
- out_tensor_names = []
+ out_tensor_names = ["unknown_name"] * len(output_relay_ids)
for layer in layers:
if not layer.internal:
for relay_id in layer.attrs["relay_id"]:
if relay_id in output_relay_ids:
- out_tensor_names.append(layer.name)
+ out_tensor_names[output_relay_ids.index(relay_id)] = layer.name
break
- if not out_tensor_names:
+ if any([name == "unkown_name" for name in out_tensor_names]):
raise ValueError(
"During codegeneration the loading of subexpression \
failed due to output tensor name mismatch in Relay PyXIR interface."
diff --git a/python/tvm/relay/op/contrib/vitis_ai.py b/python/tvm/relay/op/contrib/vitis_ai.py
index fa17c63fc00a..aaa9f99e61ed 100644
--- a/python/tvm/relay/op/contrib/vitis_ai.py
+++ b/python/tvm/relay/op/contrib/vitis_ai.py
@@ -85,6 +85,10 @@ def visit_call(self, call):
def annotation(mod, params, target):
"""Annotate Relay expression for Vitis-AI DPU accelerators"""
+ # We need type information for supporting models that contain operations that don't
+ # have a Relay to XLayer translation
+ mod = relay.transform.InferType()(mod)
+
xgraph = pyxir.frontend.tvm.from_relay(mod, params, postprocessing=None)
xgraph = pyxir.partition(xgraph, targets=[target])