From 34263b514437c1f2b28a7c85b799cbe5dc7a8b28 Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Wed, 9 Sep 2020 12:35:32 -0700
Subject: [PATCH] [Target] Tags, Composite Target, Unified Interface

* Add `set_attr_preprocessor` to TargetKind registry, which is used to pre-process attribute maps.
* Use `set_attr_preprocessor` for NVPTX and ROCm backend to check and add mcpu and mtriple.
* Add TargetTag registration and retrieval on C++ side and python side. Allow creation of Target using the tag name.
* Unify target creation on C++ side, replace Target::Create and Target::FromConfig with the constructor.
* Unify target creation on python side, deprecate tvm.target.create and encourage direct use of the constructor of tvm.target.Target instead.
* Add initial support for composite target.
---
 apps/benchmark/gpu_imagenet_bench.py          |   2 +-
 apps/topi_recipe/conv/test_conv_int8_arm.py   |   2 +-
 apps/topi_recipe/conv/test_conv_int8_intel.py |   2 +-
 apps/topi_recipe/gemm/gemm_int8.py            |   2 +-
 include/tvm/target/tag.h                      | 155 ++++
 include/tvm/target/target.h                   | 110 +--
 include/tvm/target/target_kind.h              |  94 ++-
 python/tvm/autotvm/feature.py                 |  13 +-
 .../autotvm/graph_tuner/base_graph_tuner.py   |  68 +-
 python/tvm/autotvm/measure/measure_methods.py |  32 +-
 python/tvm/autotvm/record.py                  |  33 +-
 python/tvm/autotvm/task/dispatcher.py         |   8 +-
 python/tvm/autotvm/task/task.py               |  50 +-
 python/tvm/autotvm/task/topi_integration.py   |   9 +-
 python/tvm/autotvm/tophub.py                  |  22 +-
 python/tvm/contrib/peak.py                    |  22 +-
 python/tvm/driver/build_module.py             |  14 +-
 python/tvm/relay/backend/compile_engine.py    |  11 +-
 .../relay/backend/graph_runtime_codegen.py    |  11 +-
 python/tvm/relay/backend/vm.py                |  12 +-
 python/tvm/relay/build_module.py              |  32 +-
 python/tvm/relay/testing/py_converter.py      |   4 +-
 python/tvm/target/__init__.py                 |   3 +-
 python/tvm/target/codegen.py                  |   4 +-
 python/tvm/target/intrin.py                   |   5 +-
 python/tvm/target/tag.py                      |  78 ++
 python/tvm/target/target.py                   | 149 ++--
 python/tvm/te/hybrid/calls.py                 |   6 +-
 python/tvm/te/hybrid/runtime.py               |  67 +-
 .../tvm/topi/cuda/conv2d_hwnc_tensorcore.py   |   8 +-
 python/tvm/topi/cuda/softmax.py               |   5 +-
 python/tvm/topi/generic/__init__.py           |   2 +-
 python/tvm/topi/testing/common.py             |   2 +-
 rust/tvm/examples/resnet/src/build_resnet.py  |   2 +-
 src/auto_scheduler/measure_record.cc          |   2 +-
 src/driver/driver_api.cc                      |   8 +-
 src/relay/backend/build_module.cc             |   8 +-
 src/relay/backend/compile_engine.cc           |   2 +-
 src/relay/backend/graph_runtime_codegen.cc    |   2 +-
 src/relay/backend/interpreter.cc              |   2 +-
 src/relay/backend/vm/compiler.cc              |   8 +-
 src/relay/transforms/fold_constant.cc         |   2 +-
 src/relay/transforms/partial_eval.cc          |   2 +-
 src/target/build_common.h                     |  16 -
 src/target/llvm/codegen_amdgpu.cc             |  64 +-
 src/target/llvm/codegen_blob.cc               |   2 +-
 src/target/llvm/codegen_nvptx.cc              |  34 +-
 src/target/llvm/llvm_module.cc                |  10 +-
 src/target/tag.cc                             |  77 ++
 src/target/target.cc                          | 784 +++++++++---------
 src/target/target_kind.cc                     | 335 +++++---
 src/topi/schedule.cc                          |   2 +-
 tests/cpp/build_module_test.cc                |   8 +-
 tests/cpp/relay_build_module_test.cc          |   2 +-
 tests/cpp/relay_transform_sequential_test.cc  |   2 +-
 tests/cpp/target_test.cc                      |  19 +-
 tests/cpp/utvm_runtime_standalone_test.cc     |   2 +-
 .../contrib/test_ethosn/infrastructure.py     |   4 +-
 tests/python/integration/test_ewise.py        |   4 +-
 tests/python/integration/test_gemm.py         |   2 +-
 .../integration/test_winograd_nnpack.py       |   2 +-
 .../relay/test_backend_compile_engine.py      |   4 +-
 .../python/relay/test_backend_interpreter.py  |   2 +-
 .../python/relay/test_pass_alter_op_layout.py |   6 +-
 tests/python/relay/test_pass_auto_quantize.py |   2 +-
 tests/python/relay/test_pass_fold_constant.py |   2 +-
 tests/python/relay/test_pass_manager.py       |   2 +-
 tests/python/relay/test_pass_qnn_legalize.py  |  36 +-
 tests/python/topi/python/test_fifo_buffer.py  |   4 +-
 .../topi/python/test_topi_batch_matmul.py     |   2 +-
 .../topi/python/test_topi_bitserial_conv2d.py |   4 +-
 .../python/test_topi_bitserial_conv2d_rasp.py |   2 +-
 tests/python/topi/python/test_topi_bnn.py     |   2 +-
 .../python/topi/python/test_topi_broadcast.py |  10 +-
 tests/python/topi/python/test_topi_clip.py    |   2 +-
 tests/python/topi/python/test_topi_conv1d.py  |   2 +-
 .../python/test_topi_conv1d_transpose_ncw.py  |   2 +-
 .../topi/python/test_topi_conv2d_NCHWc.py     |   2 +-
 .../topi/python/test_topi_conv2d_hwcn.py      |   2 +-
 .../test_topi_conv2d_hwnc_tensorcore.py       |   2 +-
 .../topi/python/test_topi_conv2d_int8.py      |   8 +-
 .../topi/python/test_topi_conv2d_nchw.py      |   2 +-
 .../topi/python/test_topi_conv2d_nhwc.py      |   2 +-
 .../python/test_topi_conv2d_nhwc_pack_int8.py |   2 +-
 .../test_topi_conv2d_nhwc_tensorcore.py       |   2 +-
 .../python/test_topi_conv2d_nhwc_winograd.py  |   2 +-
 .../python/test_topi_conv2d_transpose_nchw.py |   2 +-
 .../topi/python/test_topi_conv2d_winograd.py  |   2 +-
 .../topi/python/test_topi_conv3d_ncdhw.py     |   2 +-
 .../topi/python/test_topi_conv3d_ndhwc.py     |   2 +-
 .../test_topi_conv3d_ndhwc_tensorcore.py      |   2 +-
 .../test_topi_conv3d_transpose_ncdhw.py       |   2 +-
 .../topi/python/test_topi_conv3d_winograd.py  |   2 +-
 .../topi/python/test_topi_correlation.py      |   2 +-
 .../python/test_topi_deformable_conv2d.py     |   2 +-
 tests/python/topi/python/test_topi_dense.py   |   4 +-
 .../topi/python/test_topi_dense_tensorcore.py |   2 +-
 .../topi/python/test_topi_depth_to_space.py   |   2 +-
 .../topi/python/test_topi_depthwise_conv2d.py |   6 +-
 .../topi/python/test_topi_group_conv2d.py     |   4 +-
 .../test_topi_group_conv2d_NCHWc_int8.py      |   2 +-
 tests/python/topi/python/test_topi_image.py   |  10 +-
 tests/python/topi/python/test_topi_lrn.py     |   2 +-
 tests/python/topi/python/test_topi_math.py    |  10 +-
 tests/python/topi/python/test_topi_pooling.py |  12 +-
 tests/python/topi/python/test_topi_reduce.py  |   2 +-
 tests/python/topi/python/test_topi_relu.py    |   2 +-
 tests/python/topi/python/test_topi_reorg.py   |   2 +-
 tests/python/topi/python/test_topi_softmax.py |   2 +-
 tests/python/topi/python/test_topi_sort.py    |   4 +-
 .../topi/python/test_topi_space_to_depth.py   |   2 +-
 tests/python/topi/python/test_topi_sparse.py  |   4 +-
 tests/python/topi/python/test_topi_tensor.py  |   2 +-
 .../python/topi/python/test_topi_transform.py |  60 +-
 .../topi/python/test_topi_upsampling.py       |   4 +-
 tests/python/topi/python/test_topi_vision.py  |  14 +-
 .../test_auto_scheduler_cost_model.py         |   8 +-
 ...test_auto_scheduler_evolutionary_search.py |  11 +-
 .../unittest/test_auto_scheduler_feature.py   |  31 +-
 .../unittest/test_auto_scheduler_measure.py   |  19 +-
 .../test_auto_scheduler_search_policy.py      |  13 +-
 .../test_auto_scheduler_sketch_generation.py  |   2 +-
 tests/python/unittest/test_autotvm_common.py  |  15 +-
 tests/python/unittest/test_autotvm_measure.py |   6 +-
 .../unittest/test_runtime_heterogeneous.py    |  16 +-
 .../unittest/test_target_codegen_cuda.py      | 168 ++--
 .../unittest/test_target_codegen_llvm.py      | 143 ++--
 .../unittest/test_target_custom_datatypes.py  |   2 +-
 tests/python/unittest/test_target_target.py   |  29 +-
 .../python/unittest/test_te_hybrid_script.py  |   2 +-
 .../unittest/test_te_tensor_overload.py       |  38 +-
 .../test_tir_analysis_verify_memory.py        | 122 ++-
 .../test_tir_transform_lower_intrin.py        |  73 +-
 .../test_tir_transform_lower_warp_memory.py   |  46 +-
 .../test_tir_transform_make_packed_api.py     |   2 +-
 .../test_tir_transform_thread_sync.py         |  11 +-
 tutorials/autotvm/tune_conv2d_cuda.py         |   2 +-
 tutorials/autotvm/tune_relay_arm.py           |   2 +-
 tutorials/autotvm/tune_relay_mobile_gpu.py    |   2 +-
 tutorials/autotvm/tune_simple_template.py     |   2 +-
 tutorials/dev/use_pass_infra.py               |   4 +-
 tutorials/frontend/deploy_model_on_rasp.py    |   4 +-
 tutorials/language/tedd.py                    |   2 +-
 .../optimize/opt_matmul_auto_tensorcore.py    |   2 +-
 tutorials/topi/intro_topi.py                  |   4 +-
 145 files changed, 2017 insertions(+), 1486 deletions(-)
 create mode 100644 include/tvm/target/tag.h
 create mode 100644 python/tvm/target/tag.py
 create mode 100644 src/target/tag.cc

diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py
index a3df2c46a24b..a65a9e885749 100644
--- a/apps/benchmark/gpu_imagenet_bench.py
+++ b/apps/benchmark/gpu_imagenet_bench.py
@@ -78,7 +78,7 @@ def benchmark(network, target):
     else:
         networks = [args.network]
 
-    target = tvm.target.create('%s -device=%s -model=%s' % (args.target, args.device, args.model))
+    target = tvm.target.Target('%s -device=%s -model=%s' % (args.target, args.device, args.model))
 
     print("--------------------------------------------------")
     print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)"))
diff --git a/apps/topi_recipe/conv/test_conv_int8_arm.py b/apps/topi_recipe/conv/test_conv_int8_arm.py
index d4f98b0ac4d7..ba8d0a44f6d9 100644
--- a/apps/topi_recipe/conv/test_conv_int8_arm.py
+++ b/apps/topi_recipe/conv/test_conv_int8_arm.py
@@ -110,7 +110,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f
     c_sch = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
 
 
-    with tvm.target.create(TARGET_NAME):
+    with tvm.target.Target(TARGET_NAME):
         if out_dtype == "float32":
             conv = topi.nn.conv2d_NCHWc(data, kernel, stride=hstride,
                                         padding=hpad, dilation=(1, 1),
diff --git a/apps/topi_recipe/conv/test_conv_int8_intel.py b/apps/topi_recipe/conv/test_conv_int8_intel.py
index 93b783340ee1..3edfc03e7440 100644
--- a/apps/topi_recipe/conv/test_conv_int8_intel.py
+++ b/apps/topi_recipe/conv/test_conv_int8_intel.py
@@ -111,7 +111,7 @@ def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_f
     c_sch = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
 
 
-    with tvm.target.create(TARGET_NAME):
+    with tvm.target.Target(TARGET_NAME):
         conv = topi.nn.conv2d_NCHWc(data, kernel, stride=hstride,
                                     padding=hpad, dilation=(1, 1),
                                     layout='NCHWc', out_layout='NCHWc', out_dtype=out_dtype)
diff --git a/apps/topi_recipe/gemm/gemm_int8.py b/apps/topi_recipe/gemm/gemm_int8.py
index fd037117d77b..9362d71a68da 100644
--- a/apps/topi_recipe/gemm/gemm_int8.py
+++ b/apps/topi_recipe/gemm/gemm_int8.py
@@ -143,7 +143,7 @@ def block_size_filter(entity):
         print(config)
 
     with dispatch_context:
-        with tvm.target.create('cuda'):
+        with tvm.target.Target('cuda'):
             s, arg_bufs = gemm_int8(n, m, l)
             f = tvm.build(s, arg_bufs, 'cuda', name='gemm_int8')
 
diff --git a/include/tvm/target/tag.h b/include/tvm/target/tag.h
new file mode 100644
index 000000000000..7add206f3ec5
--- /dev/null
+++ b/include/tvm/target/tag.h
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/target/tag.h
+ * \brief Target tag registry
+ */
+#ifndef TVM_TARGET_TAG_H_
+#define TVM_TARGET_TAG_H_
+
+#include <tvm/node/attr_registry_map.h>
+#include <tvm/node/node.h>
+#include <tvm/target/target.h>
+
+#include <utility>
+
+namespace tvm {
+
+/*! \brief A target tag */
+class TargetTagNode : public Object {
+ public:
+  /*! \brief Name of the target */
+  String name;
+  /*! \brief Config map to generate the target */
+  Map<String, ObjectRef> config;
+
+  void VisitAttrs(AttrVisitor* v) {
+    v->Visit("name", &name);
+    v->Visit("config", &config);
+  }
+
+  static constexpr const char* _type_key = "TargetTag";
+  TVM_DECLARE_FINAL_OBJECT_INFO(TargetTagNode, Object);
+
+ private:
+  /*! \brief Return the index stored in attr registry */
+  uint32_t AttrRegistryIndex() const { return index_; }
+  /*! \brief Return the name stored in attr registry */
+  String AttrRegistryName() const { return name; }
+  /*! \brief Index used for internal lookup of attribute registry */
+  uint32_t index_;
+
+  template <typename, typename>
+  friend class AttrRegistry;
+  template <typename>
+  friend class AttrRegistryMapContainerMap;
+  friend class TargetTagRegEntry;
+};
+
+/*!
+ * \brief Managed reference class to TargetTagNode
+ * \sa TargetTagNode
+ */
+class TargetTag : public ObjectRef {
+ public:
+  /*!
+   * \brief Retrieve the Target given it the name of target tag
+   * \param target_tag_name Name of the target tag
+   * \return The Target requested
+   */
+  TVM_DLL static Optional<Target> Get(const String& target_tag_name);
+  /*!
+   * \brief List all names of the existing target tags
+   * \return A dictionary that maps tag name to the concrete target it corresponds to
+   */
+  TVM_DLL static Map<String, Target> ListTags();
+  /*!
+   * \brief Add a tag into the registry
+   * \param name Name of the tag
+   * \param config The target config corresponding to the tag
+   * \param override Allow overriding existing tags
+   * \return Target created with the tag
+   */
+  TVM_DLL static Target AddTag(String name, Map<String, ObjectRef> config, bool override);
+
+  TVM_DEFINE_OBJECT_REF_METHODS(TargetTag, ObjectRef, TargetTagNode);
+
+ private:
+  /*! \brief Mutable access to the container class  */
+  TargetTagNode* operator->() { return static_cast<TargetTagNode*>(data_.get()); }
+  friend class TargetTagRegEntry;
+};
+
+class TargetTagRegEntry {
+ public:
+  /*!
+   * \brief Set the config dict corresponding to the target tag
+   * \param config The config dict for target creation
+   */
+  inline TargetTagRegEntry& set_config(Map<String, ObjectRef> config);
+  /*! \brief Set name of the TargetTag to be the same as registry if it is empty */
+  inline TargetTagRegEntry& set_name();
+  /*!
+   * \brief Register or get a new entry.
+   * \param target_tag_name The name of the TargetTag.
+   * \return the corresponding entry.
+   */
+  TVM_DLL static TargetTagRegEntry& RegisterOrGet(const String& target_tag_name);
+
+ private:
+  TargetTag tag_;
+  String name;
+
+  /*! \brief private constructor */
+  explicit TargetTagRegEntry(uint32_t reg_index) : tag_(make_object<TargetTagNode>()) {
+    tag_->index_ = reg_index;
+  }
+  template <typename, typename>
+  friend class AttrRegistry;
+  friend class TargetTag;
+};
+
+inline TargetTagRegEntry& TargetTagRegEntry::set_config(Map<String, ObjectRef> config) {
+  tag_->config = std::move(config);
+  return *this;
+}
+
+inline TargetTagRegEntry& TargetTagRegEntry::set_name() {
+  if (tag_->name.empty()) {
+    tag_->name = name;
+  }
+  return *this;
+}
+
+#define TVM_TARGET_TAG_REGISTER_VAR_DEF \
+  static DMLC_ATTRIBUTE_UNUSED ::tvm::TargetTagRegEntry& __make_##TargetTag
+
+/*!
+ * \def TVM_REGISTER_TARGET_TAG
+ * \brief Register a new target tag, or set attribute of the corresponding target tag.
+ * \param TargetTagName The name of target tag
+ */
+#define TVM_REGISTER_TARGET_TAG(TargetTagName)                   \
+  TVM_STR_CONCAT(TVM_TARGET_TAG_REGISTER_VAR_DEF, __COUNTER__) = \
+      ::tvm::TargetTagRegEntry::RegisterOrGet(TargetTagName).set_name()
+
+}  // namespace tvm
+
+#endif  // TVM_TARGET_TAG_H_
diff --git a/include/tvm/target/target.h b/include/tvm/target/target.h
index 4d5fba39f506..2abdb558baf8 100644
--- a/include/tvm/target/target.h
+++ b/include/tvm/target/target.h
@@ -24,22 +24,20 @@
 #ifndef TVM_TARGET_TARGET_H_
 #define TVM_TARGET_TARGET_H_
 
-#include <tvm/ir/expr.h>
-#include <tvm/ir/transform.h>
-#include <tvm/node/container.h>
+#include <tvm/node/node.h>
 #include <tvm/support/with.h>
 #include <tvm/target/target_kind.h>
 
 #include <string>
-#include <unordered_map>
 #include <unordered_set>
-#include <utility>
 #include <vector>
 
 namespace tvm {
+
+class TargetInternal;
+
 /*!
  * \brief Compilation target.
- * \note Use target::llvm, target::cuda etc functions.
  * \sa Target
  */
 class TargetNode : public Object {
@@ -52,7 +50,11 @@ class TargetNode : public Object {
   Array<String> keys;
   /*! \brief Collection of attributes */
   Map<String, ObjectRef> attrs;
-  /*! \return the full device string to pass to codegen::Build */
+  /*!
+   * \brief The raw string representation of the target
+   * \return the full device string to pass to codegen::Build
+   * \note It will be deprecated after the Target RFC is fully landed.
+   */
   TVM_DLL const std::string& str() const;
   /*! \return Export target to JSON-like configuration */
   TVM_DLL Map<String, ObjectRef> Export() const;
@@ -106,27 +108,8 @@ class TargetNode : public Object {
  private:
   /*! \brief Internal string repr. */
   mutable std::string str_repr_;
-  /*!
-   * \brief Parsing TargetNode::attrs from a list of raw strings
-   * \param obj The attribute to be parsed
-   * \param info The runtime type information for parsing
-   * \return The attribute parsed
-   */
-  ObjectRef ParseAttr(const ObjectRef& obj, const TargetKindNode::ValueTypeInfo& info) const;
-  /*!
-   * \brief Parsing TargetNode::attrs from a list of raw strings
-   * \param options The raw string of fields to be parsed
-   * \return The attributes parsed
-   */
-  Map<String, ObjectRef> ParseAttrsFromRaw(const std::vector<std::string>& options) const;
-  /*!
-   * \brief Serialize the attributes of a target to raw string
-   * \param attrs The attributes to be converted to string
-   * \return The string converted, NullOpt if attrs is empty
-   */
-  Optional<String> StringifyAttrsToRaw(const Map<String, ObjectRef>& attrs) const;
 
-  friend class Target;
+  friend class TargetInternal;
 };
 
 /*!
@@ -135,31 +118,18 @@ class TargetNode : public Object {
  */
 class Target : public ObjectRef {
  public:
-  Target() {}
-  /*! \brief Constructor from ObjectPtr */
-  explicit Target(ObjectPtr<Object> n) : ObjectRef(n) {}
+  /*! \brief Construct a null Target */
+  TVM_DLL explicit Target(std::nullptr_t) { data_ = nullptr; }
   /*!
-   * \brief Create a Target using a JSON-like configuration
-   * \param config The JSON-like configuration
-   * \return The target created
+   * \brief Construct a Target given a string
+   * \param tag_or_config_or_target_str the string to parse
    */
-  TVM_DLL static Target FromConfig(const Map<String, ObjectRef>& config);
+  TVM_DLL explicit Target(const String& tag_or_config_or_target_str);
   /*!
-   * \brief Create a Target given a string
-   * \param target_str the string to parse
-   * \return The target created
-   */
-  TVM_DLL static Target Create(const String& target_str);
-  /*!
-   * \brief Construct a Target node from the given name and options.
-   * \param name The major target name. Should be one of
-   * {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hexagon", "hybrid", "llvm",
-   *  "metal", "nvptx", "opencl", "rocm", "sdaccel", "stackvm", "vulkan"}
-   * \param options Additional options appended to the target
-   * \return The constructed Target
+   * \brief Construct a Target using a JSON-like configuration
+   * \param config The JSON-like configuration
    */
-  TVM_DLL static Target CreateTarget(const std::string& name,
-                                     const std::vector<std::string>& options);
+  TVM_DLL explicit Target(const Map<String, ObjectRef>& config);
   /*!
    * \brief Get the current target context from thread local storage.
    * \param allow_not_defined If the context stack is empty and this is set to true, an
@@ -170,14 +140,11 @@ class Target : public ObjectRef {
    */
   TVM_DLL static tvm::Target Current(bool allow_not_defined = true);
 
-  const TargetNode* operator->() const { return static_cast<const TargetNode*>(get()); }
-
-  using ContainerType = TargetNode;
-  class Internal;
+  TVM_DEFINE_OBJECT_REF_METHODS(Target, ObjectRef, TargetNode);
 
  private:
   // enable with syntax.
-  friend class Internal;
+  friend class TargetInternal;
   friend class With<Target>;
   /*!
    * \brief Push a new target context onto the thread local stack.
@@ -192,42 +159,5 @@ class Target : public ObjectRef {
   TVM_DLL void ExitWithScope();
 };
 
-/*! \brief This namespace provides functions to construct Target instances */
-namespace target {
-
-/*! \return A target for LLVM */
-TVM_DLL Target llvm(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for CUDA */
-TVM_DLL Target cuda(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for ROCm */
-TVM_DLL Target rocm(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for OpenCL */
-TVM_DLL Target opencl(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for Metal */
-TVM_DLL Target metal(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for rasp */
-TVM_DLL Target rasp(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for Mali */
-TVM_DLL Target mali(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for Intel Graphics */
-TVM_DLL Target intel_graphics(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for stackvm */
-TVM_DLL Target stackvm(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for external device */
-TVM_DLL Target ext_dev(const std::vector<std::string>& options = std::vector<std::string>());
-
-/*! \return A target for hexagon */
-TVM_DLL Target hexagon(const std::vector<std::string>& options = std::vector<std::string>());
-}  // namespace target
-
 }  // namespace tvm
 #endif  // TVM_TARGET_TARGET_H_
diff --git a/include/tvm/target/target_kind.h b/include/tvm/target/target_kind.h
index e4e7c2fa8a4d..dd14602fa6fc 100644
--- a/include/tvm/target/target_kind.h
+++ b/include/tvm/target/target_kind.h
@@ -24,15 +24,10 @@
 #ifndef TVM_TARGET_TARGET_KIND_H_
 #define TVM_TARGET_TARGET_KIND_H_
 
-#include <tvm/ir/expr.h>
-#include <tvm/ir/transform.h>
 #include <tvm/node/attr_registry_map.h>
-#include <tvm/runtime/container.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/support/with.h>
+#include <tvm/node/node.h>
 
 #include <memory>
-#include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -43,7 +38,7 @@ template <typename, typename, typename>
 struct ValueTypeInfoMaker;
 }
 
-class Target;
+class TargetInternal;
 
 template <typename>
 class TargetKindAttrMap;
@@ -57,6 +52,8 @@ class TargetKindNode : public Object {
   int device_type;
   /*! \brief Default keys of the target */
   Array<String> default_keys;
+  /*! \brief Function used to preprocess on target creation */
+  PackedFunc preprocessor;
 
   void VisitAttrs(AttrVisitor* v) {
     v->Visit("name", &name);
@@ -85,16 +82,15 @@ class TargetKindNode : public Object {
   std::unordered_map<String, ObjectRef> key2default_;
   /*! \brief Index used for internal lookup of attribute registry */
   uint32_t index_;
-  friend class Target;
-  friend class TargetNode;
-  friend class TargetKind;
+
+  template <typename, typename, typename>
+  friend struct detail::ValueTypeInfoMaker;
   template <typename, typename>
   friend class AttrRegistry;
   template <typename>
   friend class AttrRegistryMapContainerMap;
   friend class TargetKindRegEntry;
-  template <typename, typename, typename>
-  friend struct detail::ValueTypeInfoMaker;
+  friend class TargetInternal;
 };
 
 /*!
@@ -112,7 +108,7 @@ class TargetKind : public ObjectRef {
    * \param target_kind_name Name of the target kind
    * \return The TargetKind requested
    */
-  TVM_DLL static const TargetKind& Get(const String& target_kind_name);
+  TVM_DLL static Optional<TargetKind> Get(const String& target_kind_name);
   TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TargetKind, ObjectRef, TargetKindNode);
 
  private:
@@ -120,10 +116,8 @@ class TargetKind : public ObjectRef {
   TargetKindNode* operator->() { return static_cast<TargetKindNode*>(data_.get()); }
   TVM_DLL static const AttrRegistryMapContainerMap<TargetKind>& GetAttrMapContainer(
       const String& attr_name);
-  template <typename, typename>
-  friend class AttrRegistry;
   friend class TargetKindRegEntry;
-  friend class Target;
+  friend class TargetInternal;
 };
 
 /*!
@@ -178,6 +172,13 @@ class TargetKindRegEntry {
    * \param keys The default keys
    */
   inline TargetKindRegEntry& set_default_keys(std::vector<String> keys);
+  /*!
+   * \brief Set the pre-processing function applied upon target creation
+   * \tparam FLambda Type of the function
+   * \param f The pre-processing function
+   */
+  template <typename FLambda>
+  inline TargetKindRegEntry& set_attrs_preprocessor(FLambda f);
   /*!
    * \brief Register a valid configuration option and its ValueType for validation
    * \param key The configuration key
@@ -222,29 +223,6 @@ class TargetKindRegEntry {
   friend class TargetKind;
 };
 
-#define TVM_TARGET_KIND_REGISTER_VAR_DEF \
-  static DMLC_ATTRIBUTE_UNUSED ::tvm::TargetKindRegEntry& __make_##TargetKind
-
-/*!
- * \def TVM_REGISTER_TARGET_KIND
- * \brief Register a new target kind, or set attribute of the corresponding target kind.
- *
- * \param TargetKindName The name of target kind
- *
- * \code
- *
- *  TVM_REGISTER_TARGET_KIND("llvm")
- *  .set_attr<TPreCodegenPass>("TPreCodegenPass", a-pre-codegen-pass)
- *  .add_attr_option<Bool>("system_lib")
- *  .add_attr_option<String>("mtriple")
- *  .add_attr_option<String>("mattr");
- *
- * \endcode
- */
-#define TVM_REGISTER_TARGET_KIND(TargetKindName)                  \
-  TVM_STR_CONCAT(TVM_TARGET_KIND_REGISTER_VAR_DEF, __COUNTER__) = \
-      ::tvm::TargetKindRegEntry::RegisterOrGet(TargetKindName).set_name()
-
 namespace detail {
 template <typename Type, template <typename...> class Container>
 struct is_specialized : std::false_type {
@@ -334,6 +312,13 @@ inline TargetKindRegEntry& TargetKindRegEntry::set_default_keys(std::vector<Stri
   return *this;
 }
 
+template <typename FLambda>
+inline TargetKindRegEntry& TargetKindRegEntry::set_attrs_preprocessor(FLambda f) {
+  using FType = typename tvm::runtime::detail::function_signature<FLambda>::FType;
+  kind_->preprocessor = tvm::runtime::TypedPackedFunc<FType>(std::move(f)).packed();
+  return *this;
+}
+
 template <typename ValueType>
 inline TargetKindRegEntry& TargetKindRegEntry::add_attr_option(const String& key) {
   CHECK(!kind_->key2vtype_.count(key))
@@ -357,6 +342,37 @@ inline TargetKindRegEntry& TargetKindRegEntry::set_name() {
   return *this;
 }
 
+#define TVM_TARGET_KIND_REGISTER_VAR_DEF \
+  static DMLC_ATTRIBUTE_UNUSED ::tvm::TargetKindRegEntry& __make_##TargetKind
+
+/*!
+ * \def TVM_REGISTER_TARGET_KIND
+ * \brief Register a new target kind, or set attribute of the corresponding target kind.
+ *
+ * \param TargetKindName The name of target kind
+ * \param DeviceType The DLDeviceType of the target kind
+ *
+ * \code
+ *
+ *  TVM_REGISTER_TARGET_KIND("llvm")
+ *  .set_attr<TPreCodegenPass>("TPreCodegenPass", a-pre-codegen-pass)
+ *  .add_attr_option<Bool>("system_lib")
+ *  .add_attr_option<String>("mtriple")
+ *  .add_attr_option<String>("mattr");
+ *
+ * \endcode
+ */
+#define TVM_REGISTER_TARGET_KIND(TargetKindName, DeviceType)      \
+  TVM_STR_CONCAT(TVM_TARGET_KIND_REGISTER_VAR_DEF, __COUNTER__) = \
+      ::tvm::TargetKindRegEntry::RegisterOrGet(TargetKindName)    \
+          .set_name()                                             \
+          .set_device_type(DeviceType)                            \
+          .add_attr_option<Array<String>>("keys")                 \
+          .add_attr_option<String>("tag")                         \
+          .add_attr_option<String>("device")                      \
+          .add_attr_option<String>("model")                       \
+          .add_attr_option<Array<String>>("libs")
+
 }  // namespace tvm
 
 #endif  // TVM_TARGET_TARGET_KIND_H_
diff --git a/python/tvm/autotvm/feature.py b/python/tvm/autotvm/feature.py
index 0c0591ccf2a1..8df6e53c4844 100644
--- a/python/tvm/autotvm/feature.py
+++ b/python/tvm/autotvm/feature.py
@@ -30,10 +30,11 @@
 import numpy as np
 import tvm._ffi
 
-from tvm import target as _target
+from tvm.target import Target
 from tvm.te import schedule
 from tvm.driver import build_module
 
+
 def ana_lower(sch, args,
               binds=None,
               simple_mode=True):
@@ -52,6 +53,7 @@ def ana_lower(sch, args,
     assert simple_mode
     return mod["main"].body
 
+
 try:
     _get_buffer_curve_sample_flatten = tvm._ffi.get_global_func(
         "autotvm.feature.GetCurveSampleFeatureFlatten")
@@ -65,6 +67,7 @@ def raise_error(*args, **kwargs):  # pylint: disable=unused-argument
     _get_buffer_curve_sample_flatten = _get_itervar_feature = _get_itervar_feature_flatten = \
         raise_error
 
+
 def get_itervar_feature(sch, args, take_log=False):
     """get features of iter vars
 
@@ -93,6 +96,7 @@ def get_itervar_feature(sch, args, take_log=False):
         ret.append(tmp)
     return ret
 
+
 def flatten_itervar_feature(fea):
     """flatten features into one-dimensional feature vectors
 
@@ -112,6 +116,7 @@ def flatten_itervar_feature(fea):
             flatten.append(pair[1:])
     return np.concatenate(flatten)
 
+
 def get_itervar_feature_flatten(sch, args, take_log=True):
     """get flatten features of iter vars
     this is equivalent to get_itervar_feature + flatten_itervar_feature, but much faster.
@@ -134,6 +139,7 @@ def get_itervar_feature_flatten(sch, args, take_log=True):
     feas = struct.unpack('%df' % (len(feas)//4), feas)
     return feas
 
+
 def get_flatten_name(fea):
     """ Get names of feature after flatten.
 
@@ -163,7 +169,7 @@ def get_flatten_name(fea):
         if ret is None:
             raise ValueError("Unsupported AutoTVM log format")
         inp, _ = ret
-        target = _target.create(inp.target)
+        target = Target(inp.target)
         with target:
             s, args = inp.template.instantiate(inp.config)
         fea = get_itervar_feature(s, args)
@@ -180,7 +186,8 @@ def get_flatten_name(fea):
                 name_list = feature_name["buf_touch"]
 
             for i in range(len((pair[1:]))):
-                names.append(".".join(["f%d" % ct, var_name, key, name_list[i]]))
+                names.append(
+                    ".".join(["f%d" % ct, var_name, key, name_list[i]]))
                 ct += 1
     return names
 
diff --git a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py
index fa0186025e02..5a1ef16107a8 100644
--- a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py
+++ b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py
@@ -29,13 +29,14 @@
 from tvm.autotvm.record import encode, load_from_file
 from tvm.autotvm.measure import MeasureResult, MeasureInput
 
-from ... import target as _target
+from ...target import Target
 from .utils import is_boundary_node, get_in_nodes, get_out_nodes, has_multiple_inputs, \
     bind_inputs, expr2graph
 from ._base import INVALID_LAYOUT_TIME
 
 from ._base import OPT_OUT_OP
 
+
 def get_infer_layout(task_name):
     if task_name.startswith("conv2d"):
         return topi.nn.conv2d_infer_layout
@@ -43,6 +44,7 @@ def get_infer_layout(task_name):
         return topi.nn.depthwise_conv2d_infer_layout
     raise ValueError("Cannot find infer layout for task %s" % task_name)
 
+
 @autotvm.template("layout_transform")
 def layout_transform(*args):
     """Autotvm layout transform template."""
@@ -61,6 +63,7 @@ class BaseGraphTuner(object):
     Before creating a Graph Executor instance, schedule candidates for all kernels in
     graph should be provided through tensor-level tuning.
     """
+
     def __init__(self, graph, input_shapes, records, target_ops,
                  target, max_sch_num=20, dtype="float32", verbose=True,
                  log_file="graph_tuner.log", log_level=logging.DEBUG,
@@ -111,7 +114,7 @@ def __init__(self, graph, input_shapes, records, target_ops,
         self._records = records
         self._dtype = dtype
         if isinstance(target, str):
-            target = _target.create(target)
+            target = Target(target)
         self._target = target
         self._optimal_record_dict = {}
 
@@ -126,7 +129,8 @@ def __init__(self, graph, input_shapes, records, target_ops,
                 need_console_handler = False
         self._log_level = log_level
         self._log_file = log_file
-        self._formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+        self._formatter = logging.Formatter(
+            '%(asctime)s %(levelname)s %(message)s')
         self._logger.setLevel(log_level)
         if need_file_handler:
             file_handler = logging.FileHandler(log_file)
@@ -151,7 +155,8 @@ def __init__(self, graph, input_shapes, records, target_ops,
             raise RuntimeError("Unsupported graph type: %s" % str(type(graph)))
 
         self._graph = graph
-        self._in_nodes_dict = get_in_nodes(self._node_list, self._target_ops, input_shapes.keys())
+        self._in_nodes_dict = get_in_nodes(
+            self._node_list, self._target_ops, input_shapes.keys())
         if len(self._in_nodes_dict) == 0:
             raise RuntimeError("Could not find any input nodes with whose "
                                "operator is one of %s" % self._target_ops)
@@ -180,9 +185,11 @@ def __init__(self, graph, input_shapes, records, target_ops,
                         input_workload = input_node["workloads"][0]
                         first_tensor = input_workload[1]
                         dtype = first_tensor[-1]
-                        new_shape = tuple([val.value for val in node_entry["types"][0].shape])
+                        new_shape = tuple(
+                            [val.value for val in node_entry["types"][0].shape])
                         actual_workload = (input_workload[0],) + \
-                                          (("TENSOR", new_shape, dtype),) + input_workload[2:]
+                                          (("TENSOR", new_shape, dtype),) + \
+                            input_workload[2:]
                         node_entry["workloads"].append(actual_workload)
                         if "record_candidates" not in node_entry:
                             node_entry["record_candidates"] = input_node["record_candidates"]
@@ -190,7 +197,6 @@ def __init__(self, graph, input_shapes, records, target_ops,
                         node_entry["topi_op"].append(None)
                         node_entry["workloads"].append(None)
 
-
     def _fetch_cfg(self):
         """Read and pre-process input schedules."""
         if isinstance(self._records, str):
@@ -267,7 +273,8 @@ def _iterate_layout_transform(self, callback):
 
                 if node_entry["op"] in self._target_ops:
                     o_idx = key
-                    o_infer_layout_func = get_infer_layout(node_entry["topi_op"][0])
+                    o_infer_layout_func = get_infer_layout(
+                        node_entry["topi_op"][0])
                     o_wkl = node_entry["workloads"][0]
                     i_topi_op = in_node_entry["topi_op"][0]
                     i_wkl = in_node_entry["workloads"][0]
@@ -281,9 +288,11 @@ def _iterate_layout_transform(self, callback):
                     o_idx = target_input_idx
                     if i <= target_input_pos:
                         continue
-                    o_infer_layout_func = get_infer_layout(node_entry["topi_op"][0])
+                    o_infer_layout_func = get_infer_layout(
+                        node_entry["topi_op"][0])
                     o_wkl = node_entry["workloads"][target_input_pos]
-                    i_infer_layout_func = get_infer_layout(node_entry["topi_op"][i])
+                    i_infer_layout_func = get_infer_layout(
+                        node_entry["topi_op"][i])
                     i_wkl = node_entry["workloads"][i]
 
                 if (i_idx, o_idx) in pair_tracker:
@@ -294,8 +303,10 @@ def _iterate_layout_transform(self, callback):
                     for n, o_record in enumerate(node_entry["record_candidates"]):
                         i_cfg, o_cfg = i_record[0].config, o_record[0].config
                         with self._target:
-                            i_input_info, i_output_info = i_infer_layout_func(i_wkl, i_cfg)
-                            o_input_info, o_output_info = o_infer_layout_func(o_wkl, o_cfg)
+                            i_input_info, i_output_info = i_infer_layout_func(
+                                i_wkl, i_cfg)
+                            o_input_info, o_output_info = o_infer_layout_func(
+                                o_wkl, o_cfg)
                         if len(i_input_info) > 1 or len(i_output_info) > 1 or \
                                 len(o_input_info) > 1 or len(o_output_info) > 1:
                             raise RuntimeError("Graph tuner only supports target operator "
@@ -312,7 +323,6 @@ def _iterate_layout_transform(self, callback):
                         args = [data_placeholder, in_layout, out_layout]
                         callback(i_idx, o_idx, m, n, args)
 
-
     def _create_matrix_callback(self, from_node_idx, to_node_idx, from_sch_idx,
                                 to_sch_idx, args):
         """Create dictionary containing matrix format of layout transformation
@@ -403,12 +413,14 @@ def benchmark_layout_transform(self, min_exec_num=100, timeout=10,
         """
         self._logger.info("Start to benchmark layout transformation...")
         if layout_records is None and infer_layout:
-            raise RuntimeError("Requires some records to infer layout transformation time.")
+            raise RuntimeError(
+                "Requires some records to infer layout transformation time.")
 
         if isinstance(layout_records, str):
             layout_records = load_from_file(layout_records)
             if not layout_records and infer_layout:
-                raise RuntimeError("Records must be non-empty to infer layout transformation time.")
+                raise RuntimeError(
+                    "Records must be non-empty to infer layout transformation time.")
 
         if isinstance(layout_records, str):
             layout_records = load_from_file(layout_records)
@@ -424,6 +436,7 @@ def benchmark_layout_transform(self, min_exec_num=100, timeout=10,
         avg_time = total_time / num_flops if num_flops > 0 else 0
 
         args_list = []
+
         def _fetch_args_callback(from_node_idx, to_node_idx, from_sch_idx,
                                  to_sch_idx, args):
             """Callback function to fetch layout transform args"""
@@ -440,18 +453,22 @@ def _callback(_, inputs, results):
                 record_list.append((inputs[0], results[0]))
             return _callback
 
-        builder = autotvm.LocalBuilder(n_parallel=n_parallel, build_func=build_func)
-        runner = autotvm.LocalRunner(number=min_exec_num, repeat=1, timeout=timeout)
+        builder = autotvm.LocalBuilder(
+            n_parallel=n_parallel, build_func=build_func)
+        runner = autotvm.LocalRunner(
+            number=min_exec_num, repeat=1, timeout=timeout)
         if use_rpc:
             if device_key is None:
-                raise RuntimeError("device_key need to be set to use rpc tracker mode.")
+                raise RuntimeError(
+                    "device_key need to be set to use rpc tracker mode.")
             runner = autotvm.measure.RPCRunner(device_key, host, port, n_parallel=n_parallel,
                                                number=min_exec_num, repeat=1,
                                                timeout=timeout)
         measure_option = autotvm.measure_option(builder=builder, runner=runner)
         for args in args_list:
             data, in_layout, out_layout = args
-            ltf_workload = autotvm.task.args_to_workload(args, 'layout_transform')
+            ltf_workload = autotvm.task.args_to_workload(
+                args, 'layout_transform')
             if ltf_workload in self._layout_transform_perf_records:
                 continue
 
@@ -472,10 +489,12 @@ def _callback(_, inputs, results):
                 else:
                     inferred_time = flops * avg_time
 
-                record_input = MeasureInput(target=self._target, task=None, config=None)
+                record_input = MeasureInput(
+                    target=self._target, task=None, config=None)
                 record_output = MeasureResult(costs=(inferred_time,), error_no=0,
                                               all_cost=-1, timestamp=-1)
-                self._layout_transform_perf_records[ltf_workload] = (record_input, record_output)
+                self._layout_transform_perf_records[ltf_workload] = (
+                    record_input, record_output)
                 continue
 
             records = []
@@ -485,7 +504,8 @@ def _callback(_, inputs, results):
             tuner.tune(n_trial=1, measure_option=measure_option,
                        callbacks=[_log_to_list(records)])
             if not isinstance(records[0][1].costs[0], float):
-                records[0] = (records[0][0], records[0][1]._replace(costs=(INVALID_LAYOUT_TIME,)))
+                records[0] = (records[0][0], records[0]
+                              [1]._replace(costs=(INVALID_LAYOUT_TIME,)))
             self._layout_transform_perf_records[ltf_workload] = records[0]
 
         self._iterate_layout_transform(self._create_matrix_callback)
@@ -502,7 +522,6 @@ def layout_transform_perf_records(self):
         """
         return self._layout_transform_perf_records
 
-
     def get_optimal_records(self):
         """Convert optimal record dictionary to a list of records
         with ascending order of node index in graph.
@@ -518,7 +537,8 @@ def get_optimal_records(self):
             node_entry = self._node_list[index]
             if node_entry["op"] not in self._target_ops:
                 continue
-            ret.append(node_entry["record_candidates"][self._optimal_record_dict[index]])
+            ret.append(node_entry["record_candidates"]
+                       [self._optimal_record_dict[index]])
         return ret
 
     def write_opt_sch2record_file(self, record_file="graph_opt_schedule.log"):
diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py
index db955ff64102..9c22b641cbf9 100644
--- a/python/tvm/autotvm/measure/measure_methods.py
+++ b/python/tvm/autotvm/measure/measure_methods.py
@@ -35,7 +35,8 @@
 
 import tvm._ffi
 import tvm.ir.transform
-from tvm import nd, rpc as _rpc, target as _target
+from tvm import nd, rpc as _rpc
+from tvm.target import Target
 from tvm.error import TVMError
 from tvm.driver import build
 from tvm.contrib import nvcc, ndk, tar
@@ -49,6 +50,7 @@
 
 logger = logging.getLogger('autotvm')
 
+
 class BuildResult(namedtuple("BuildResult", ('filename', 'arg_info', 'error', 'time_cost'))):
     """
     Stores all the necessary inputs for a measurement.
@@ -65,6 +67,7 @@ class BuildResult(namedtuple("BuildResult", ('filename', 'arg_info', 'error', 't
         The time cost of building
     """
 
+
 class LocalBuilder(Builder):
     """Run compilation on local machine
 
@@ -79,6 +82,7 @@ class LocalBuilder(Builder):
         If is 'ndk', use function for android ndk
         If is callable, use it as custom build function, expect lib_format field.
     """
+
     def __init__(self, timeout=10, n_parallel=None, build_func='default'):
         super(LocalBuilder, self).__init__(timeout, n_parallel)
 
@@ -187,6 +191,7 @@ class RPCRunner(Runner):
         To make this option effective, the argument `number` should also be set to 1.
         This is only has effect on CPU task.
     """
+
     def __init__(self,
                  key, host, port, priority=1,
                  timeout=10, n_parallel=None,
@@ -226,7 +231,7 @@ def set_task(self, task):
         if self.check_correctness:
             # use llvm cpu to generate a reference input/output
             # this option works for tuning topi, but might not work for you custom op
-            with _target.create("llvm"):
+            with Target("llvm"):
                 s, arg_bufs = task.instantiate(task.config_space.get(0))
             self.ref_input = [np.random.uniform(size=get_const_tuple(x.shape)).astype(x.dtype)
                               for x in arg_bufs]
@@ -251,15 +256,18 @@ def get_build_kwargs(self):
             }
 
             if 'cuda' in self.task.target.keys:
-                kwargs["cuda_arch"] = "sm_" + "".join(ctx.compute_version.split('.'))
+                kwargs["cuda_arch"] = "sm_" + \
+                    "".join(ctx.compute_version.split('.'))
         if self.task.target.device_name == 'micro_dev':
-            kwargs.setdefault('build_option', {})['tir.disable_vectorize'] = True
+            kwargs.setdefault('build_option', {})[
+                'tir.disable_vectorize'] = True
 
         return kwargs
 
     def run(self, measure_inputs, build_results):
         results = []
-        remote_args = (self.key, self.host, self.port, self.priority, self.timeout)
+        remote_args = (self.key, self.host, self.port,
+                       self.priority, self.timeout)
 
         for i in range(0, len(measure_inputs), self.n_parallel):
             futures = []
@@ -288,6 +296,7 @@ def run(self, measure_inputs, build_results):
 
         return results
 
+
 class LocalRunner(RPCRunner):
     """Run generated code on local devices.
 
@@ -328,6 +337,7 @@ class LocalRunner(RPCRunner):
     This is a "fake" local mode. We start a silent rpc tracker and rpc server
     for the user. In this way we reuse timeout/isolation mechanism in RPC infrastructure.
     """
+
     def __init__(self,
                  timeout=10,
                  number=4, repeat=3, min_repeat_ms=0, cooldown_interval=0.1,
@@ -380,7 +390,7 @@ def _build_func_common(measure_input, check_gpu=None, cuda_arch=None, build_opti
 
         # if target is vta, we need to use vta build
         if hasattr(measure_input.target, 'device_name') and \
-            measure_input.target.device_name == 'vta':
+                measure_input.target.device_name == 'vta':
             # pylint: disable=import-outside-toplevel
             import vta
             func = vta.build(s, args, target_host=task.target_host)
@@ -407,9 +417,11 @@ class _WrappedBuildFunc():
     wrapped_build_func : callable
         The wrapped build function
     """
+
     def __init__(self, build_func):
         if not hasattr(build_func, "output_format"):
-            raise AttributeError("Expect build_func to have the attribute output_format.")
+            raise AttributeError(
+                "Expect build_func to have the attribute output_format.")
         self.build_func = build_func
 
     def __call__(self, measure_input, tmp_dir, **kwargs):
@@ -435,6 +447,7 @@ def __call__(self, measure_input, tmp_dir, **kwargs):
             return BuildResult(None, None, e, time.time() - tic)
         return BuildResult(filename, arg_info, None, time.time() - tic)
 
+
 def run_through_rpc(measure_input, build_result,
                     number, repeat, min_repeat_ms, cooldown_interval,
                     remote_args, ref_input=None, ref_output=None,
@@ -488,7 +501,7 @@ def run_through_rpc(measure_input, build_result,
         remote = request_remote(*remote_args)
         # Program the FPGA every single time when targeting VTA
         if hasattr(measure_input.target, 'device_name') and \
-            measure_input.target.device_name == 'vta':
+                measure_input.target.device_name == 'vta':
             # pylint: disable=import-outside-toplevel
             from vta import program_fpga, reconfig_runtime
             program_fpga(remote, None)
@@ -630,7 +643,8 @@ def tvm_callback_cuda_compile(code):
     #   "-gencode", "arch=compute_70,code=sm_70"
     # ]
     target = "fatbin" if isinstance(curr_cuda_target_arch, list) else "ptx"
-    ptx = nvcc.compile_cuda(code, target=target, arch=AutotvmGlobalScope.current.cuda_target_arch)
+    ptx = nvcc.compile_cuda(code, target=target,
+                            arch=AutotvmGlobalScope.current.cuda_target_arch)
     return ptx
 
 
diff --git a/python/tvm/autotvm/record.py b/python/tvm/autotvm/record.py
index 5a61c344f72b..b96e9bd8b39d 100644
--- a/python/tvm/autotvm/record.py
+++ b/python/tvm/autotvm/record.py
@@ -30,7 +30,8 @@
 from collections import OrderedDict
 import numpy as np
 
-from .. import build, lower, target as _target
+from .. import build, lower
+from ..target import Target
 from .. import __version__
 from . import task
 from .task import ConfigEntity, ApplyHistoryBest
@@ -142,16 +143,18 @@ def decode(row, protocol='json'):
         row = json.loads(row)
         if 'v' in row and row['v'] == 0.1:
             if _old_version_warning:
-                logger.warning("AutoTVM log version 0.1 is no longer supported.")
+                logger.warning(
+                    "AutoTVM log version 0.1 is no longer supported.")
                 _old_version_warning = False
             return None
 
         tgt, task_name, task_args, task_kwargs = row["input"]
         tgt = str(tgt)
         if "-target" in tgt:
-            logger.warning("\"-target\" is deprecated, use \"-mtriple\" instead.")
+            logger.warning(
+                "\"-target\" is deprecated, use \"-mtriple\" instead.")
             tgt = tgt.replace("-target", "-mtriple")
-        tgt = _target.create(str(tgt))
+        tgt = Target(str(tgt))
 
         def clean_json_to_python(x):
             """1. Convert all list in x to tuple (hashable)
@@ -165,10 +168,12 @@ def clean_json_to_python(x):
                 return int(x)
             return x
 
-        tsk = task.Task(clean_json_to_python(task_name), clean_json_to_python(task_args))
+        tsk = task.Task(clean_json_to_python(task_name),
+                        clean_json_to_python(task_args))
         config = ConfigEntity.from_json_dict(row["config"])
         inp = MeasureInput(tgt, tsk, config)
-        result = MeasureResult(*[tuple(x) if isinstance(x, list) else x for x in row["result"]])
+        result = MeasureResult(
+            *[tuple(x) if isinstance(x, list) else x for x in row["result"]])
         config.cost = np.mean(result.costs)
 
         return inp, result
@@ -176,13 +181,15 @@ def clean_json_to_python(x):
         items = row.split("\t")
         if len(items) == 4:
             if _old_version_warning:
-                logger.warning("AutoTVM log version 0.1 is no longer supported.")
+                logger.warning(
+                    "AutoTVM log version 0.1 is no longer supported.")
                 _old_version_warning = False
             return None
-        tgt = _target.create(items[0])
+        tgt = Target(items[0])
         task_tuple = pickle.loads(base64.b64decode(items[1].encode()))
         config = pickle.loads(base64.b64decode(items[2].encode()))
-        result = MeasureResult(*pickle.loads(base64.b64decode(items[3].encode())))
+        result = MeasureResult(
+            *pickle.loads(base64.b64decode(items[3].encode())))
         config.cost = np.mean(result.costs)
 
         tsk = task.Task(task_tuple[0], task_tuple[1])
@@ -251,7 +258,8 @@ def split_workload(in_file, clean=True):
                 cleaned.append([inp, res])
 
             # write to file
-            logger.info("Key: %s\tValid: %d\tDup: %d\t", k, len(cleaned), len(v) - len(cleaned))
+            logger.info("Key: %s\tValid: %d\tDup: %d\t", k,
+                        len(cleaned), len(v) - len(cleaned))
             with open(args.i + ".%03d.wkl" % i, 'w') as fout:
                 for inp, res in cleaned:
                     fout.write(encode(inp, res) + '\n')
@@ -262,6 +270,7 @@ def split_workload(in_file, clean=True):
                 for inp, res in v:
                     fout.write(encode(inp, res) + '\n')
 
+
 def pick_best(in_file, out_file):
     """
     Pick best entries from a file and store it to another file.
@@ -298,6 +307,7 @@ def pick_best(in_file, out_file):
             fout.write(encode(inp, res) + "\n")
             best_set.remove(measure_str_key(inp))
 
+
 """
 Usage:
 This record executable module has three modes.
@@ -313,7 +323,8 @@ def pick_best(in_file, out_file):
 """
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("--mode", choices=['read', 'pick', 'split'], default='read')
+    parser.add_argument(
+        "--mode", choices=['read', 'pick', 'split'], default='read')
     parser.add_argument("--i", type=str, help="input file")
     parser.add_argument("--o", type=str, default=None, help='output file')
     parser.add_argument("--begin", type=int, default=0)
diff --git a/python/tvm/autotvm/task/dispatcher.py b/python/tvm/autotvm/task/dispatcher.py
index 736b5f3eb8f4..ebb19b04809b 100644
--- a/python/tvm/autotvm/task/dispatcher.py
+++ b/python/tvm/autotvm/task/dispatcher.py
@@ -160,6 +160,7 @@ class ApplyConfig(DispatchContext):
     config : ConfigSpace or ConfigEntity
         The specific configuration we care about.
     """
+
     def __init__(self, config):
         super(ApplyConfig, self).__init__()
         self._config = config
@@ -187,6 +188,7 @@ class ApplyHistoryBest(DispatchContext):
         If is str, then it should be the filename of a records log file.
         Each row of this file is an encoded record pair. Otherwise, it is an iterator.
     """
+
     def __init__(self, records):
         super(ApplyHistoryBest, self).__init__()
 
@@ -253,7 +255,7 @@ def load(self, records):
     def _query_inside(self, target, workload):
         if target is None:
             raise RuntimeError("Need a target context to find the history best. "
-                               "Hint: If your target is llvm, use `with tvm.target.create('llvm'):`"
+                               "Hint: If your target is llvm, use `with tvm.target.Target('llvm'):`"
                                " above the dispatcher call. So does other target. ")
 
         # first try matching by model
@@ -306,7 +308,8 @@ def _query_inside(self, target, workload):
 
         if not _env.GLOBAL_SCOPE.silent:
             msg = "Cannot find config for target=%s, workload=%s. A fallback configuration "\
-                  "is used, which may bring great performance regression." % (target, workload)
+                  "is used, which may bring great performance regression." % (
+                      target, workload)
             if msg not in DispatchContext.warning_messages:
                 DispatchContext.warning_messages.add(msg)
                 logger.warning(msg)
@@ -370,6 +373,7 @@ class ApplyGraphBest(DispatchContext):
     This context maintains an internal counter to indicate the current
     node index.
     """
+
     def __init__(self, records):
         """
         Parameters
diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py
index 3942599e2cb1..c4b1d348c832 100644
--- a/python/tvm/autotvm/task/task.py
+++ b/python/tvm/autotvm/task/task.py
@@ -23,7 +23,7 @@
 """
 import numpy as np
 
-from tvm import target as _target
+from tvm.target import Target
 from tvm import runtime
 from tvm.ir import container
 from tvm.tir import expr
@@ -34,6 +34,7 @@
 from .dispatcher import DispatchContext, ApplyConfig
 from .space import ConfigSpace
 
+
 def _raise_error(*args, **kwargs):  # pylint: disable=unused-argument
     raise RuntimeError("The function of this task is not found. Possibly the function "
                        "of this task is registered in another python file "
@@ -115,6 +116,7 @@ class Task(object):
     args: Tuple
         Positional argument of func
     """
+
     def __init__(self, name, args):
         self.name = name
         self.args = args
@@ -187,8 +189,10 @@ def __repr__(self):
             self.name, self.args, self.kwargs, self.workload
         )
 
+
 TASK_TABLE = {}
 
+
 class TaskTemplate(object):
     """
     Task template is used to creates a tunable AutoTVM task.
@@ -201,6 +205,7 @@ class TaskTemplate(object):
     Note that when customized func is registered, compute and schedule function
     will be ignored
     """
+
     def __init__(self):
         self.fcompute = None
         self.fschedule = None
@@ -230,11 +235,13 @@ def _get_inputs(out):
             if isinstance(t.op, tensor.PlaceholderOp):
                 inputs.append(t)
             else:
-                input_tensors = [t for t in t.op.input_tensors if t not in hash_set]
+                input_tensors = [
+                    t for t in t.op.input_tensors if t not in hash_set]
                 queue.extend(input_tensors)
                 hash_set.update(input_tensors)
         return inputs
 
+
 def _register_task_compute(name, func=None):
     """Register compute function to autotvm task
 
@@ -257,13 +264,15 @@ def _do_reg(f):
             TASK_TABLE[name] = TaskTemplate()
         tmpl = TASK_TABLE[name]
         if tmpl.fcompute is not None:
-            raise ValueError("Compute is already registered in autoTVM task %s" % name)
+            raise ValueError(
+                "Compute is already registered in autoTVM task %s" % name)
         tmpl.fcompute = f
         return f
     if func:
         return _do_reg(func)
     return _do_reg
 
+
 def _register_task_schedule(name, func=None):
     """Register schedule function to autotvm task
 
@@ -286,13 +295,15 @@ def _do_reg(f):
             TASK_TABLE[name] = TaskTemplate()
         tmpl = TASK_TABLE[name]
         if tmpl.fschedule is not None:
-            raise ValueError("Schedule is already registered in autoTVM task %s" % name)
+            raise ValueError(
+                "Schedule is already registered in autoTVM task %s" % name)
         tmpl.fschedule = f
         return f
     if func:
         return _do_reg(func)
     return _do_reg
 
+
 def _register_customized_task(name, func=None):
     """Register a customized function to AutoTVM task.
 
@@ -315,7 +326,8 @@ def _do_reg(f):
             TASK_TABLE[name] = TaskTemplate()
         tmpl = TASK_TABLE[name]
         if tmpl.fcustomized is not None:
-            raise ValueError("Customized func is already registered in autoTVM task %s" % name)
+            raise ValueError(
+                "Customized func is already registered in autoTVM task %s" % name)
         tmpl.fcustomized = f
         return f
     if func:
@@ -378,7 +390,7 @@ def _decorate(f):
         def wrapper(*args, **kwargs):
             assert not kwargs, "Do not support kwargs in template function call"
             workload = args_to_workload(args, task_name)
-            tgt = _target.Target.current()
+            tgt = Target.current()
             cfg = DispatchContext.current.query(tgt, workload)
             with ApplyConfig(cfg):
                 return f(*args, **kwargs)
@@ -414,7 +426,7 @@ def create(task_name, args, target, target_host=None):
     ret = Task(task_name, args)
 
     if isinstance(target, str):
-        target = _target.create(target)
+        target = Target(target)
 
     # init config space
     ret.config_space = ConfigSpace()
@@ -431,6 +443,7 @@ def create(task_name, args, target, target_host=None):
 
     return ret
 
+
 def get_config():
     """Get current config object
 
@@ -439,9 +452,10 @@ def get_config():
     cfg: ConfigSpace or ConfigEntity
         The current config
     """
-    tgt = _target.Target.current(allow_none=True)
+    tgt = Target.current(allow_none=True)
     return DispatchContext.current.query(tgt, None)
 
+
 class FlopCalculationError(RuntimeError):
     """Error happens when estimating FLOP for a compute op"""
 
@@ -462,7 +476,8 @@ def compute_flop(sch):
     def _prod_length(axes):
         """compute product of the lengths of a list of axes"""
         try:
-            num_iter = int(np.prod([get_const_int(axis.dom.extent) for axis in axes]))
+            num_iter = int(
+                np.prod([get_const_int(axis.dom.extent) for axis in axes]))
         except ValueError:
             raise FlopCalculationError("The length of axis is not constant. ")
         return num_iter
@@ -474,9 +489,11 @@ def _count_flop(exp):
             combiner = exp.combiner.result
             source = exp.source
             if len(combiner) != 1:
-                raise FlopCalculationError("Found multiple output in the combiner of reduce op")
+                raise FlopCalculationError(
+                    "Found multiple output in the combiner of reduce op")
             if len(source) != 1:
-                raise FlopCalculationError("Found multiple output in the source of reduce op")
+                raise FlopCalculationError(
+                    "Found multiple output in the source of reduce op")
             return num_iter * (_count_flop(combiner[0]) + _count_flop(source[0]))
         if isinstance(exp, (expr.FloatImm, expr.IntImm)):
             return 0
@@ -506,7 +523,8 @@ def _count_flop(exp):
         if isinstance(exp, expr.Call):
             return sum([_count_flop(x) for x in exp.args])
 
-        raise FlopCalculationError("Found unsupported operator in the compute expr")
+        raise FlopCalculationError(
+            "Found unsupported operator in the compute expr")
 
     def traverse(ops):
         """accumulate flops"""
@@ -517,7 +535,8 @@ def traverse(ops):
 
                 body = op.body
                 if len(body) != 1:
-                    raise FlopCalculationError("Found multiple output in the compute")
+                    raise FlopCalculationError(
+                        "Found multiple output in the compute")
                 exp = body[0]
 
                 ret += num_element * _count_flop(exp)
@@ -534,8 +553,9 @@ def traverse(ops):
         ret = traverse(sch.outputs)
     except FlopCalculationError as exc:
         raise RuntimeError("FLOP estimator fails for this operator. Error msg: "
-                           + str(exc) + ". Please use `cfg.add_flop` to manually set "
-                                        "FLOP for this operator")
+                           + str(exc) +
+                           ". Please use `cfg.add_flop` to manually set "
+                           "FLOP for this operator")
 
     if ret == 0:
         raise RuntimeError("Cannot find float number operation in this operator. "
diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py
index 59e77f7d0098..cc0170f9b1bb 100644
--- a/python/tvm/autotvm/task/topi_integration.py
+++ b/python/tvm/autotvm/task/topi_integration.py
@@ -27,7 +27,7 @@
 See tvm/topi/python/topi/arm_cpu/depthwise_conv2d.py for example usage.
 """
 import tvm.te._ffi_api
-from tvm import target as _target
+from tvm.target import Target
 from tvm.te import tensor
 
 from .task import args_to_workload, serialize_args, DispatchContext, \
@@ -150,7 +150,7 @@ def wrapper(*args, **kwargs):
             if task_env is not None and task_env.tracing:
                 task_env.add_task(task_name, args)
             workload = args_to_workload(args, task_name)
-            tgt = _target.Target.current()
+            tgt = Target.current()
             cfg = DispatchContext.current.query(tgt, workload)
             node = topi_compute(cfg, *args)
 
@@ -217,8 +217,9 @@ def wrapper(outs, *args, **kwargs):
             """wrapper function for topi schedule"""
             workload = get_workload(outs)
             if workload is None:
-                raise RuntimeError("Cannot find workload in attribute of this schedule")
-            tgt = _target.Target.current()
+                raise RuntimeError(
+                    "Cannot find workload in attribute of this schedule")
+            tgt = Target.current()
             cfg = DispatchContext.current.query(tgt, workload)
             return topi_schedule(cfg, outs, *args, **kwargs)
         return wrapper
diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py
index 063932dee582..e637e296dd61 100644
--- a/python/tvm/autotvm/tophub.py
+++ b/python/tvm/autotvm/tophub.py
@@ -27,7 +27,7 @@
 import sys
 
 from .task import ApplyHistoryBest
-from .. import target as _target
+from ..target import Target
 from ..contrib.download import download
 from .record import load_from_file
 from .util import EmptyContext
@@ -42,7 +42,8 @@
 AUTOTVM_TOPHUB_NONE_LOC = "NONE"
 
 # root path to store TopHub files
-AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(os.path.expanduser('~'), ".tvm", "tophub")
+AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(
+    os.path.expanduser('~'), ".tvm", "tophub")
 
 # the version of each package
 PACKAGE_VERSION = {
@@ -60,6 +61,7 @@
 
 logger = logging.getLogger('autotvm')
 
+
 def _alias(name):
     """convert alias for some packages"""
     table = {
@@ -73,10 +75,12 @@ def _alias(name):
     }
     return table.get(name, name)
 
+
 def _get_tophub_location():
     location = os.getenv(AUTOTVM_TOPHUB_LOC_VAR, None)
     return AUTOTVM_TOPHUB_DEFAULT_LOC if location is None else location
 
+
 def context(target, extra_files=None):
     """Return the dispatch context with pre-tuned parameters.
     This function will load the corresponding *.log files in AUTOTVM_TOPHUB_ROOT_PATH.
@@ -100,7 +104,7 @@ def context(target, extra_files=None):
 
     for tgt in targets:
         if isinstance(tgt, str):
-            tgt = _target.create(tgt)
+            tgt = Target(tgt)
 
         possible_names = []
         device = tgt.attrs.get("device", "")
@@ -116,7 +120,8 @@ def context(target, extra_files=None):
                     continue
 
                 filename = "%s_%s.log" % (name, PACKAGE_VERSION[name])
-                best_context.load(os.path.join(AUTOTVM_TOPHUB_ROOT_PATH, filename))
+                best_context.load(os.path.join(
+                    AUTOTVM_TOPHUB_ROOT_PATH, filename))
                 break   # only load one file to avoid some fallback template mismatch problem
 
     if extra_files:
@@ -157,7 +162,8 @@ def check_backend(tophub_location, backend):
         download_package(tophub_location, package_name)
         return True
     except urllib2.URLError as e:
-        logging.warning("Failed to download tophub package for %s: %s", backend, e)
+        logging.warning(
+            "Failed to download tophub package for %s: %s", backend, e)
         return False
 
 
@@ -184,12 +190,14 @@ def download_package(tophub_location, package_name):
 
     download_url = "{0}/{1}".format(tophub_location, package_name)
     logger.info("Download pre-tuned parameters package from %s", download_url)
-    download(download_url, os.path.join(rootpath, package_name), True, verbose=0)
+    download(download_url, os.path.join(
+        rootpath, package_name), True, verbose=0)
 
 
 # global cache for load_reference_log
 REFERENCE_LOG_CACHE = {}
 
+
 def load_reference_log(backend, model, workload_name):
     """ Load reference log from TopHub to support fallback in template.
     Template will use these reference logs to choose fallback config.
@@ -220,7 +228,7 @@ def load_reference_log(backend, model, workload_name):
             tophub_location = _get_tophub_location()
             if tophub_location != AUTOTVM_TOPHUB_NONE_LOC:
                 download_package(tophub_location, package_name)
-        if os.path.isfile(filename): # in case download failed
+        if os.path.isfile(filename):  # in case download failed
             find = False
             inp = None
             counts = {}
diff --git a/python/tvm/contrib/peak.py b/python/tvm/contrib/peak.py
index 2906410efc40..98a51b803c00 100644
--- a/python/tvm/contrib/peak.py
+++ b/python/tvm/contrib/peak.py
@@ -23,6 +23,7 @@
 from . import util
 from .. import rpc
 
+
 def _convert_to_remote(func, remote):
     """ convert module function to remote rpc function"""
     temp = util.tempdir()
@@ -33,6 +34,7 @@ def _convert_to_remote(func, remote):
     func = remote.load_module("tmp_func.tar")
     return func
 
+
 def measure_bandwidth_sum(total_item, item_per_thread, stride,
                           base_type, bits, lanes,
                           target, target_host, remote, ctx, n_times):
@@ -83,7 +85,8 @@ def measure_bandwidth_sum(total_item, item_per_thread, stride,
     k = te.reduce_axis((0, m), name="k")
 
     x = te.placeholder((n,), dtype=dtype, name="x")
-    op = te.comm_reducer(lambda x, y: x*y, lambda t: tvm.tir.const(1, dtype=t), name="sum")
+    op = te.comm_reducer(
+        lambda x, y: x*y, lambda t: tvm.tir.const(1, dtype=t), name="sum")
     y = te.compute((n // m,),
                    lambda i: op(x[i // stride * stride * m + i % stride + k * stride], axis=k))
     s = te.create_schedule(y.op)
@@ -108,6 +111,7 @@ def measure_bandwidth_sum(total_item, item_per_thread, stride,
 
     return 1.0 * (total_item * bits / 8) / 1e9 / time
 
+
 def measure_bandwidth_all_types(total_item, item_per_thread, n_times,
                                 target, target_host, remote, ctx, verbose=True):
     """ measure memory bandwidth for all types
@@ -152,9 +156,11 @@ def measure_bandwidth_all_types(total_item, item_per_thread, n_times,
                 type_name = base_type + str(bits)
                 result.append(["%sx%d" % (type_name, lanes), max_speed])
                 if verbose:
-                    logging.info("\t%-10s %.2f GBPS", result[-1][0], result[-1][1])
+                    logging.info("\t%-10s %.2f GBPS",
+                                 result[-1][0], result[-1][1])
     return result
 
+
 def measure_compute_mad(total_item, item_per_thread, base_type, bits, lanes,
                         target, target_host, remote, ctx, n_times):
     """ measure peak compute speed by computing mad for a type
@@ -225,9 +231,11 @@ def extern(ins, outs):
         b[0] = outs[0].vload(idx, dtype)
 
         if base_type.find('float') != -1:
-            mad_func = lambda x, y: (x * x + y)
+            def mad_func(x, y):
+                return x * x + y
         else:
-            mad_func = lambda x, y: y * y + x
+            def mad_func(x, y):
+                return y * y + x
 
         for _ in range(item_per_thread // 4 // lanes):
             a[0] = mad_func(a[0], b[0])
@@ -251,6 +259,7 @@ def extern(ins, outs):
 
     return 1.0 * (n * item_per_thread) / 1e9 / time
 
+
 def measure_compute_all_types(total_item, item_per_thread, n_times,
                               target, target_host, remote, ctx, verbose=True):
     """ measure peak flops for all types
@@ -298,7 +307,8 @@ def measure_compute_all_types(total_item, item_per_thread, n_times,
                 unit = "GFLOPS" if base_type == "float" else "GIOPS"
 
                 if verbose:
-                    logging.info("\t%-10s %.2f %s", result[-1][0], result[-1][1], unit)
+                    logging.info("\t%-10s %.2f %s",
+                                 result[-1][0], result[-1][1], unit)
 
     return result
 
@@ -314,7 +324,7 @@ def measure_peak_all(target, target_host, host, port):
     port: int
     """
 
-    target = tvm.target.create(target)
+    target = tvm.target.Target(target)
     remote = rpc.connect(host, port)
     n_times = 20
 
diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py
index 9a3c473737c3..e24e799ee37b 100644
--- a/python/tvm/driver/build_module.py
+++ b/python/tvm/driver/build_module.py
@@ -29,7 +29,7 @@
 from tvm.target import codegen
 from tvm.te import tensor
 from tvm.te import schedule
-from tvm import target as _target
+from tvm.target import Target
 
 
 def get_binds(args, compact=False, binds=None):
@@ -239,8 +239,8 @@ def _build_for_device(input_mod, target, target_host):
     mdev : tvm.module
         A module that contains device code.
     """
-    target = _target.create(target)
-    target_host = _target.create(target_host)
+    target = Target(target)
+    target_host = Target(target_host)
     device_type = ndarray.context(target.kind.name, 0).device_type
 
     mod_mixed = input_mod
@@ -391,23 +391,23 @@ def build(inputs,
             "inputs must be Schedule, IRModule or dict of target to IRModule")
 
     if not isinstance(inputs, (dict, container.Map)):
-        target = _target.Target.current() if target is None else target
+        target = Target.current() if target is None else target
         target = target if target else "llvm"
         target_input_mod = {target: input_mod}
     else:
         target_input_mod = inputs
 
     for tar, mod in target_input_mod.items():
-        if not isinstance(tar, (str, _target.Target)):
+        if not isinstance(tar, (str, Target)):
             raise ValueError("The key of inputs must be str or "
-                             "_target.Target when inputs is dict.")
+                             "Target when inputs is dict.")
         if not isinstance(mod, tvm.IRModule):
             raise ValueError("inputs must be Schedule, IRModule,"
                              "or dict of str to IRModule.")
 
     if not target_host:
         for tar, _ in target_input_mod.items():
-            tar = _target.create(tar)
+            tar = Target(tar)
             device_type = ndarray.context(tar.kind.name, 0).device_type
             if device_type == ndarray.cpu(0).device_type:
                 target_host = tar
diff --git a/python/tvm/relay/backend/compile_engine.py b/python/tvm/relay/backend/compile_engine.py
index d6508a6f61b7..41be47b78fd0 100644
--- a/python/tvm/relay/backend/compile_engine.py
+++ b/python/tvm/relay/backend/compile_engine.py
@@ -24,7 +24,7 @@
 from tvm import te
 from tvm.runtime import Object
 from tvm.support import libinfo
-from ... import target as _target
+from ...target import Target
 from ... import autotvm
 from .. import function as _function
 from .. import ty as _ty
@@ -33,9 +33,11 @@
 logger = logging.getLogger('compile_engine')
 autotvm_logger = logging.getLogger('autotvm')
 
+
 @tvm._ffi.register_object("relay.LoweredOutput")
 class LoweredOutput(Object):
     """Lowered output"""
+
     def __init__(self, outputs, implement):
         self.__init_handle_by_constructor__(
             _backend._make_LoweredOutput, outputs, implement)
@@ -53,6 +55,7 @@ class CCacheKey(Object):
     target : tvm.Target
         The target we want to run the function on.
     """
+
     def __init__(self, source_func, target):
         self.__init_handle_by_constructor__(
             _backend._make_CCacheKey, source_func, target)
@@ -67,7 +70,7 @@ class CCacheValue(Object):
 def _get_cache_key(source_func, target):
     if isinstance(source_func, _function.Function):
         if isinstance(target, str):
-            target = _target.create(target)
+            target = Target(target)
             if not target:
                 raise ValueError("Need target when source_func is a Function")
         return CCacheKey(source_func, target)
@@ -263,7 +266,8 @@ def lower_call(call, inputs, target):
         new_fields = []
         for field in ret_type.fields:
             if isinstance(field, _ty.TensorType):
-                new_fields.append(_ty.TensorType(get_shape(field.shape), field.dtype))
+                new_fields.append(_ty.TensorType(
+                    get_shape(field.shape), field.dtype))
             else:
                 new_fields.append(field)
         ret_type = _ty.TupleType(new_fields)
@@ -299,6 +303,7 @@ def lower_call(call, inputs, target):
 class CompileEngine(Object):
     """CompileEngine to get lowered code.
     """
+
     def __init__(self):
         raise RuntimeError("Cannot construct a CompileEngine")
 
diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_runtime_codegen.py
index 8210f27732be..81ab4cb4de25 100644
--- a/python/tvm/relay/backend/graph_runtime_codegen.py
+++ b/python/tvm/relay/backend/graph_runtime_codegen.py
@@ -35,9 +35,10 @@
 """
 from tvm.runtime.ndarray import empty
 from tvm.relay import _build_module
-from tvm import target as _target
+from tvm.target import Target
 from tvm.tir import expr as _expr
 
+
 class GraphRuntimeCodegen(object):
     """The compiler from Relay to the TVM runtime system."""
 
@@ -55,11 +56,11 @@ def _setup(self, mod, target):
         tgts = {}
         if isinstance(target, dict):
             for dev, tgt in target.items():
-                if not isinstance(tgt, (str, _target.Target)):
+                if not isinstance(tgt, (str, Target)):
                     raise Exception("Unknown target type")
-                tgts[dev] = _target.create(tgt)
-        elif isinstance(target, (str, _target.Target)):
-            tgts[_expr.IntImm("int32", 0)] = _target.create(target)
+                tgts[dev] = Target(tgt)
+        elif isinstance(target, (str, Target)):
+            tgts[_expr.IntImm("int32", 0)] = Target(target)
         self._init(mod, tgts)
 
     def codegen(self, func):
diff --git a/python/tvm/relay/backend/vm.py b/python/tvm/relay/backend/vm.py
index 656652c23004..e1de326fb84a 100644
--- a/python/tvm/relay/backend/vm.py
+++ b/python/tvm/relay/backend/vm.py
@@ -188,12 +188,14 @@ def _update_target(self, target):
             raise ValueError("Target is not set in env or passed as argument.")
         tgts = {}
         if isinstance(target, (str, tvm.target.Target)):
-            dev_type = tvm.tir.IntImm("int32", tvm.nd.context(str(target)).device_type)
-            tgts[dev_type] = tvm.target.create(target)
+            dev_type = tvm.tir.IntImm(
+                "int32", tvm.nd.context(str(target)).device_type)
+            tgts[dev_type] = tvm.target.Target(target)
         elif isinstance(target, dict):
             for dev, tgt in target.items():
-                dev_type = tvm.tir.IntImm("int32", tvm.nd.context(dev).device_type)
-                tgts[dev_type] = tvm.target.create(tgt)
+                dev_type = tvm.tir.IntImm(
+                    "int32", tvm.nd.context(dev).device_type)
+                tgts[dev_type] = tvm.target.Target(tgt)
         else:
             raise TypeError("target is expected to be str, tvm.target.Target, " +
                             "or dict of str to str/tvm.target.Target, but received " +
@@ -211,7 +213,7 @@ def _update_target_host(self, target, target_host):
         if not target_host:
             target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"
         if isinstance(target_host, str):
-            target_host = tvm.target.create(target_host)
+            target_host = tvm.target.Target(target_host)
         return target_host
 
     def _tophub_context(self, target):
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index 2f285efc8aa2..f77988e22ebc 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -24,7 +24,8 @@
 from tvm.ir import IRModule
 
 from tvm.tir import expr as tvm_expr
-from .. import nd as _nd, target as _target, autotvm
+from .. import nd as _nd, autotvm
+from ..target import Target
 from ..contrib import graph_runtime as _graph_rt
 from . import _build_module
 from . import ty as _ty
@@ -34,19 +35,21 @@
 from .backend import interpreter as _interpreter
 from .backend.vm import VMExecutor
 
+
 def _update_target(target):
-    target = target if target else _target.Target.current()
+    target = target if target else Target.current()
     if target is None:
         raise ValueError("Target is not set in env or passed as argument.")
 
     tgts = {}
-    if isinstance(target, (str, _target.Target)):
-        dev_type = tvm_expr.IntImm("int32", _nd.context(str(target)).device_type)
-        tgts[dev_type] = _target.create(target)
+    if isinstance(target, (str, Target)):
+        dev_type = tvm_expr.IntImm(
+            "int32", _nd.context(str(target)).device_type)
+        tgts[dev_type] = Target(target)
     elif isinstance(target, dict):
         for dev, tgt in target.items():
             dev_type = tvm_expr.IntImm("int32", _nd.context(dev).device_type)
-            tgts[dev_type] = _target.create(tgt)
+            tgts[dev_type] = Target(tgt)
     else:
         raise TypeError("target is expected to be str or " +
                         "tvm.target.Target, but received " +
@@ -67,6 +70,7 @@ class BuildModule(object):
     """Build an IR module to run on TVM graph runtime. This class is used
     to expose the `RelayBuildModule` APIs implemented in C++.
     """
+
     def __init__(self):
         self.mod = _build_module._BuildModule()
         self._get_graph_json = self.mod["get_graph_json"]
@@ -161,7 +165,6 @@ def optimize(self, mod, target=None, params=None):
 
         return mod, params
 
-
     def _set_params(self, params):
         self._set_params_func(_convert_param_map(params))
 
@@ -237,8 +240,8 @@ def build(mod, target=None, target_host=None, params=None, mod_name='default'):
 
     target = _update_target(target)
 
-    if isinstance(target_host, (str, _target.Target)):
-        target_host = _target.create(target_host)
+    if isinstance(target_host, (str, Target)):
+        target_host = Target(target_host)
     elif target_host:
         raise ValueError("target host must be the type of str, " +
                          "tvm.target.Target, or None")
@@ -252,8 +255,10 @@ def build(mod, target=None, target_host=None, params=None, mod_name='default'):
 
     with tophub_context:
         bld_mod = BuildModule()
-        graph_json, mod, params = bld_mod.build(mod, target, target_host, params)
-        mod = _graph_runtime_factory.GraphRuntimeFactoryModule(graph_json, mod, mod_name, params)
+        graph_json, mod, params = bld_mod.build(
+            mod, target, target_host, params)
+        mod = _graph_runtime_factory.GraphRuntimeFactoryModule(
+            graph_json, mod, mod_name, params)
         return mod
 
 
@@ -362,7 +367,8 @@ def _make_executor(self, expr=None):
         if _ty.is_dynamic(ret_type):
             raise ValueError("Graph Runtime only supports static graphs, got output type",
                              ret_type)
-        num_outputs = len(ret_type.fields) if isinstance(ret_type, _ty.TupleType) else 1
+        num_outputs = len(ret_type.fields) if isinstance(
+            ret_type, _ty.TupleType) else 1
         mod = build(self.mod, target=self.target)
         gmodule = _graph_rt.GraphModule(mod['default'](self.ctx))
 
@@ -412,7 +418,7 @@ def create_executor(kind="debug",
         ctx = _nd.context(str(target), 0)
 
     if isinstance(target, str):
-        target = _target.create(target)
+        target = Target(target)
     if kind == "debug":
         return _interpreter.Interpreter(mod, ctx, target)
     if kind == "graph":
diff --git a/python/tvm/relay/testing/py_converter.py b/python/tvm/relay/testing/py_converter.py
index 351f15364966..a782d8324c23 100644
--- a/python/tvm/relay/testing/py_converter.py
+++ b/python/tvm/relay/testing/py_converter.py
@@ -582,7 +582,7 @@ def visit_op(self, _):
         pass
 
 
-def to_python(expr: Expr, mod=None, target=tvm.target.create('llvm')):
+def to_python(expr: Expr, mod=None, target=tvm.target.Target('llvm')):
     """Converts the given Relay expression into a Python script (as a Python AST object).
     For easiest debugging, import the astor package and use to_source()."""
     mod = mod if mod is not None else tvm.IRModule()
@@ -590,7 +590,7 @@ def to_python(expr: Expr, mod=None, target=tvm.target.create('llvm')):
     return converter.convert(expr)
 
 
-def run_as_python(expr: Expr, mod=None, target=tvm.target.create('llvm')):
+def run_as_python(expr: Expr, mod=None, target=tvm.target.Target('llvm')):
     """Converts the given Relay expression into a Python script and
     executes it."""
     mod = mod if mod is not None else tvm.IRModule()
diff --git a/python/tvm/target/__init__.py b/python/tvm/target/__init__.py
index 09d7bd858070..a9a12bb158c5 100644
--- a/python/tvm/target/__init__.py
+++ b/python/tvm/target/__init__.py
@@ -51,11 +51,12 @@
    It is useful in environments where dynamic loading api like dlopen is banned.
    The system lib will be available as long as the result code is linked by the program.
 
-We can use :py:func:`tvm.target.create` to create a tvm.target.Target from the target string.
+We can use :py:func:`tvm.target.Target` to create a tvm.target.Target from the target string.
 We can also use other specific function in this module to create specific targets.
 """
 from .target import Target, create
 from .target import cuda, rocm, mali, intel_graphics, arm_cpu, rasp, vta, bifrost, hexagon
+from .tag import list_tags
 from .generic_func import GenericFunc
 from .generic_func import generic_func, get_native_generic_func, override_native_generic_func
 from . import datatype
diff --git a/python/tvm/target/codegen.py b/python/tvm/target/codegen.py
index dc65c5b72d4d..79ef46c3f62a 100644
--- a/python/tvm/target/codegen.py
+++ b/python/tvm/target/codegen.py
@@ -17,7 +17,7 @@
 # under the License.
 """Code generation related functions."""
 from . import _ffi_api
-from . import target as _tgt
+from .target import Target
 
 
 def build_module(mod, target):
@@ -36,7 +36,7 @@ def build_module(mod, target):
     module : runtime.Module
         The corressponding module.
     """
-    target = _tgt.create(target) if isinstance(target, str) else target
+    target = Target(target) if isinstance(target, str) else target
     return _ffi_api.Build(mod, target)
 
 
diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py
index a593a93662c6..6d205bc0447c 100644
--- a/python/tvm/target/intrin.py
+++ b/python/tvm/target/intrin.py
@@ -81,7 +81,7 @@ def _rule_float_suffix(op):
 
     See Also
     --------
-    register_intrin_rule : The registeration function for intrin rule.
+    register_intrin_rule : The registration function for intrin rule.
     """
     name = op.op.name
     assert name.startswith("tir.")
@@ -112,12 +112,13 @@ def _rule_float_direct(op):
 
     See Also
     --------
-    register_intrin_rule : The registeration function for intrin rule.
+    register_intrin_rule : The registration function for intrin rule.
     """
     if str(op.dtype).startswith("float"):
         return call_pure_extern(op.dtype, op.op.name[4:], *op.args)
     return None
 
+
 # opencl pattern for exp
 register_intrin_rule("opencl", "exp", _rule_float_direct, override=True)
 # default pattern for exp
diff --git a/python/tvm/target/tag.py b/python/tvm/target/tag.py
new file mode 100644
index 000000000000..c6a8f71fd8f8
--- /dev/null
+++ b/python/tvm/target/tag.py
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Target tags"""
+from typing import Any, Dict, Optional
+from . import _ffi_api
+from .target import Target
+
+
+def list_tags() -> Optional[Dict[str, Target]]:
+    """Returns a dict of tags, which maps each tag name to its corresponding target.
+
+    Returns
+    -------
+    tag_dict : Optional[Dict[str, Target]]
+        The dict of tags mapping each tag name to to its corresponding target.
+        None if TVM is built in runtime-only mode.
+    """
+    if hasattr(_ffi_api, "TargetTagListTags"):
+        return _ffi_api.TargetTagListTags()
+    return None
+
+
+def register_tag(name: str, config: Dict[str, Any], override: bool = False) -> Optional[Target]:
+    """Add a user-defined tag into the target tag registry.
+
+    Parameters
+    ----------
+    name: str
+        Name of the target, e.g. "nvidia/gtx1080ti"
+    config : Dict[str, Any]
+        The config dict used to create the target
+    override: bool
+        A boolean flag indicating if overriding existing tags are allowed.
+        If False and the tag has been registered already, an exception will be thrown.
+
+    Returns
+    -------
+    target : Optional[Target]
+        The target corresponding to the tag
+        None if TVM is built in runtime-only mode.
+
+    Examples
+    --------
+    .. code-block:: python
+
+        register_tag("nvidia/gtx1080ti", config={
+            "kind": "cuda",
+            "arch": "sm_61",
+        })
+    """
+    if hasattr(_ffi_api, "TargetTagAddTag"):
+        return _ffi_api.TargetTagAddTag(name, config, override)
+    return None
+
+
+# To check the correctness of all registered tags, the call is made in library loading time.
+list_tags()
+
+# We purposely maintain all tags in the C++ side to support pure C++ use cases,
+# and the Python API is only used for fast prototyping.
+register_tag("nvidia/gtx1080ti", config={
+    "kind": "cuda",
+    "arch": "sm_61",
+})
diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index 986caa165791..54b354f15bb6 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -22,6 +22,7 @@
 import tvm._ffi
 
 from tvm.runtime import Object
+from tvm._ffi import register_func as _register_func
 from . import _ffi_api
 
 
@@ -37,9 +38,8 @@ class Target(Object):
 
     Note
     ----
-    Do not use class constructor, you can create target using the following functions
+    You can create target using the constructor or the following functions
 
-    - :py:func:`tvm.target.create` create target from string
     - :py:func:`tvm.target.arm_cpu` create arm_cpu target
     - :py:func:`tvm.target.cuda` create CUDA target
     - :py:func:`tvm.target.rocm` create ROCM target
@@ -47,12 +47,58 @@ class Target(Object):
     - :py:func:`tvm.target.intel_graphics` create Intel Graphics target
     """
 
+    def __init__(self, tag_or_str_or_dict):
+        """Construct a TVM target object from
+        1) Raw target string
+        2) Target config dict
+        3) Target tag
+
+        Parameters
+        ----------
+        tag_or_str_or_dict : Union[str, Dict[str, Any]]
+            Can be one of a literal target string, a json string describing
+            a configuration, or a dictionary of configuration options.
+            When using a dictionary or json string to configure target, the
+            possible values are:
+
+            kind :  str (required)
+                Which codegen path to use, for example 'llvm' or 'cuda'.
+            keys : List of str (optional)
+                A set of strategies that can be dispatched to. When using
+                "kind=opencl" for example, one could set keys to ["mali", "opencl", "gpu"].
+            device : str (optional)
+                A single key that corresponds to the actual device being run on.
+                This will be effectively appended to the keys.
+            libs : List of str (optional)
+                The set of external libraries to use. For example ['cblas', 'mkl'].
+            system-lib : bool (optional)
+                If True, build a module that contains self registered functions.
+                Useful for environments where dynamic loading like dlopen is banned.
+            mcpu : str (optional)
+                The specific cpu being run on. Serves only as an annotation.
+            model : str (optional)
+                An annotation indicating what model a workload came from.
+            runtime : str (optional)
+                An annotation indicating which runtime to use with a workload.
+            mtriple : str (optional)
+                The llvm triplet describing the target, for example "arm64-linux-android".
+            mattr : List of str (optional)
+                The llvm features to compile with, for example ["+avx512f", "+mmx"].
+            mfloat-abi : str (optional)
+                An llvm setting that is one of 'hard' or 'soft' indicating whether to use
+                hardware or software floating-point operations.
+        """
+        if not isinstance(tag_or_str_or_dict, (dict, str, Target)):
+            raise ValueError("target has to be a string or dictionary.")
+        self.__init_handle_by_constructor__(
+            _ffi_api.Target, tag_or_str_or_dict)
+
     def __enter__(self):
-        _ffi_api.EnterTargetScope(self)
+        _ffi_api.TargetEnterScope(self)
         return self
 
     def __exit__(self, ptype, value, trace):
-        _ffi_api.ExitTargetScope(self)
+        _ffi_api.TargetExitScope(self)
 
     def export(self):
         return _ffi_api.TargetExport(self)
@@ -70,7 +116,7 @@ def current(allow_none=True):
         ------
         ValueError if current target is not set.
         """
-        return _ffi_api.GetCurrentTarget(allow_none)
+        return _ffi_api.TargetCurrent(allow_none)
 
     @property
     def max_num_threads(self):
@@ -104,6 +150,8 @@ def libs(self):
         return list(self.attrs.get("libs", []))
 
 
+# TODO(@tvm-team): Deprecate the helper functions below. Encourage the usage of config dict instead.
+
 def _merge_opts(opts, new_opts):
     """Helper function to merge options"""
     if isinstance(new_opts, str):
@@ -126,7 +174,7 @@ def cuda(model='unknown', options=None):
         Additional options
     """
     opts = _merge_opts(['-model=%s' % model], options)
-    return _ffi_api.TargetCreate("cuda", *opts)
+    return Target(" ".join(["cuda"] + opts))
 
 
 def rocm(model='unknown', options=None):
@@ -140,7 +188,7 @@ def rocm(model='unknown', options=None):
         Additional options
     """
     opts = _merge_opts(["-model=%s" % model], options)
-    return _ffi_api.TargetCreate("rocm", *opts)
+    return Target(" ".join(["rocm"] + opts))
 
 
 def mali(model='unknown', options=None):
@@ -155,7 +203,7 @@ def mali(model='unknown', options=None):
     """
     opts = ["-device=mali", '-model=%s' % model]
     opts = _merge_opts(opts, options)
-    return _ffi_api.TargetCreate("opencl", *opts)
+    return Target(" ".join(["opencl"] + opts))
 
 
 def intel_graphics(model='unknown', options=None):
@@ -171,7 +219,7 @@ def intel_graphics(model='unknown', options=None):
     opts = ["-device=intel_graphics", "-model=%s" %
             model, "-thread_warp_size=16"]
     opts = _merge_opts(opts, options)
-    return _ffi_api.TargetCreate("opencl", *opts)
+    return Target(" ".join(["opencl"] + opts))
 
 
 def arm_cpu(model='unknown', options=None):
@@ -204,7 +252,7 @@ def arm_cpu(model='unknown', options=None):
 
     opts = ["-device=arm_cpu"] + pre_defined_opt
     opts = _merge_opts(opts, options)
-    return _ffi_api.TargetCreate("llvm", *opts)
+    return Target(" ".join(["llvm"] + opts))
 
 
 def rasp(options=None):
@@ -223,8 +271,7 @@ def rasp(options=None):
 def vta(model='unknown', options=None):
     opts = ["-device=vta", '-keys=vta,cpu', '-model=%s' % model]
     opts = _merge_opts(opts, options)
-    ret = _ffi_api.TargetCreate("ext_dev", *opts)
-    return ret
+    return Target(" ".join(["ext_dev"] + opts))
 
 
 def bifrost(model='unknown', options=None):
@@ -237,7 +284,7 @@ def bifrost(model='unknown', options=None):
     """
     opts = ["-device=bifrost", '-model=%s' % model]
     opts = _merge_opts(opts, options)
-    return _ffi_api.TargetCreate("opencl", *opts)
+    return Target(" ".join(["opencl"] + opts))
 
 
 def hexagon(cpu_ver='v66', sim_args=None, llvm_args=None, hvx=128):
@@ -348,66 +395,26 @@ def create_llvm(llvm_args):
     llvm_str = create_llvm(llvm_args)
     args_list = target_str.split() + llvm_str.split()
 
-    return _ffi_api.TargetCreate('hexagon', *args_list)
+    return Target(" ".join(["hexagon"] + args_list))
 
 
 def create(target):
-    """Get a target given target string.
+    """Deprecated. Use the constructor of :py:mod:`tvm.target.Target` directly.
+    """
+    warnings.warn(
+        'tvm.target.create() is being deprecated. Please use tvm.target.Target() instead')
+    return Target(target)
 
-    Parameters
-    ----------
-    target : str or dict
-        Can be one of a literal target string, a json string describing
-        a configuration, or a dictionary of configuration options.
-        When using a dictionary or json string to configure target, the
-        possible values are:
-
-        kind :  str (required)
-            Which codegen path to use, for example 'llvm' or 'cuda'.
-        keys : List of str (optional)
-            A set of strategies that can be dispatched to. When using
-            "kind=opencl" for example, one could set keys to ["mali", "opencl", "gpu"].
-        device : str (optional)
-            A single key that corresponds to the actual device being run on.
-            This will be effectively appended to the keys.
-        libs : List of str (optional)
-            The set of external libraries to use. For example ['cblas', 'mkl'].
-        system-lib : bool (optional)
-            If True, build a module that contains self registered functions.
-            Useful for environments where dynamic loading like dlopen is banned.
-        mcpu : str (optional)
-            The specific cpu being run on. Serves only as an annotation.
-        model : str (optional)
-            An annotation indicating what model a workload came from.
-        runtime : str (optional)
-            An annotation indicating which runtime to use with a workload.
-        mtriple : str (optional)
-            The llvm triplet describing the target, for example "arm64-linux-android".
-        mattr : List of str (optional)
-            The llvm features to compile with, for example ["+avx512f", "+mmx"].
-        mfloat-abi : str (optional)
-            An llvm setting that is one of 'hard' or 'soft' indicating whether to use
-            hardware or software floating-point operations.
-
-    Returns
-    -------
-    target : Target
-        The target object
 
-    Note
-    ----
-    See the note on :py:mod:`tvm.target` on target string format.
-    """
-    if isinstance(target, Target):
-        return target
-    if isinstance(target, dict):
-        return _ffi_api.TargetFromConfig(target)
-    if isinstance(target, str):
-        # Check if target is a valid json string by trying to load it.
-        # If we cant, then assume it is a non-json target string.
-        try:
-            return _ffi_api.TargetFromConfig(json.loads(target))
-        except json.decoder.JSONDecodeError:
-            return _ffi_api.TargetFromString(target)
-
-    raise ValueError("target has to be a string or dictionary.")
+@_register_func("target._load_config_dict")
+def _load_config_dict(config_dict_str):
+    try:
+        config = json.loads(config_dict_str)
+    except json.decoder.JSONDecodeError:
+        return None
+    if not isinstance(config, dict):
+        return None
+    for key in config.keys():
+        if not isinstance(key, str):
+            return None
+    return config
diff --git a/python/tvm/te/hybrid/calls.py b/python/tvm/te/hybrid/calls.py
index 88ade6e49294..2c2f2bf435b8 100644
--- a/python/tvm/te/hybrid/calls.py
+++ b/python/tvm/te/hybrid/calls.py
@@ -20,7 +20,7 @@
 from tvm.runtime import const, convert
 import tvm.te
 from tvm.ir.container import Array
-from tvm import target as _tgt
+from tvm.target import Target
 from tvm.tir import expr as _expr
 from tvm.tir import call_intrin
 from tvm.tir.stmt import For
@@ -155,8 +155,8 @@ def max_num_threads(func_id, args):
     _internal_assert(func_id == "max_num_threads", "This function cannot be directly invoked!")
     _internal_assert(args.__len__() <= 1, "At most one argument accepted!")
     if args.__len__() == 0:
-        res = _tgt.Target.current().max_num_threads
+        res = Target.current().max_num_threads
     else:
         _internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint")
-        res = _tgt.Target.current(args[0].value).max_num_threads
+        res = Target.current(args[0].value).max_num_threads
     return convert(res)
diff --git a/python/tvm/te/hybrid/runtime.py b/python/tvm/te/hybrid/runtime.py
index 7987e46a4768..0f5a2b4dc6c7 100644
--- a/python/tvm/te/hybrid/runtime.py
+++ b/python/tvm/te/hybrid/runtime.py
@@ -17,11 +17,12 @@
 """Intrinsics of TVM-Python Hybrid Script for Python emulation runtime"""
 
 import numpy
-from tvm import target
+from tvm.target import Target
 
 
-class bind(object): #pylint: disable=invalid-name
+class bind(object):  # pylint: disable=invalid-name
     """GPU bind software emulataion runtime."""
+
     def __init__(self, _, ext):
         self.ext = ext
 
@@ -32,7 +33,7 @@ def __iter__(self):
             i += 1
 
 
-def allocate(shape, dtype='float32', scope='global'): #pylint: disable=unused-argument
+def allocate(shape, dtype='float32', scope='global'):  # pylint: disable=unused-argument
     """Allocate a buffer with given shape
 
     Parameters
@@ -107,39 +108,39 @@ def sigmoid(x):
 
 def max_num_threads(allow_none=True):
     """Get max number of threads for GPU targets."""
-    return target.Target.current(allow_none).max_num_threads
+    return Target.current(allow_none).max_num_threads
 
 
 HYBRID_GLOBALS = {
-    'unroll'         : range,
-    'vectorize'      : range,
-    'parallel'       : range,
-    'const_range'    : range,
-    'bind'           : bind,
-    'allocate'       : allocate,
-    'output_tensor'  : allocate,
-    'sqrt'           : numpy.sqrt,
-    'rsqrt'          : rsqrt,
-    'log'            : numpy.log,
-    'tanh'           : numpy.tanh,
-    'power'          : numpy.power,
-    'exp'            : numpy.exp,
-    'sigmoid'        : sigmoid,
-    'popcount'       : popcount,
-    'round'          : round,
-    'likely'         : lambda cond: cond,
-    'uint8'          : numpy.uint8,
-    'uint16'         : numpy.uint16,
-    'uint32'         : numpy.uint32,
-    'uint64'         : numpy.uint64,
-    'int8'           : numpy.int8,
-    'int16'          : numpy.int16,
-    'int32'          : numpy.int32,
-    'int64'          : numpy.int64,
-    'float16'        : numpy.float16,
-    'float32'        : numpy.float32,
-    'float64'        : numpy.float64,
-    'ceil_div'       : lambda a, b: (a + b - 1) // b,
+    'unroll': range,
+    'vectorize': range,
+    'parallel': range,
+    'const_range': range,
+    'bind': bind,
+    'allocate': allocate,
+    'output_tensor': allocate,
+    'sqrt': numpy.sqrt,
+    'rsqrt': rsqrt,
+    'log': numpy.log,
+    'tanh': numpy.tanh,
+    'power': numpy.power,
+    'exp': numpy.exp,
+    'sigmoid': sigmoid,
+    'popcount': popcount,
+    'round': round,
+    'likely': lambda cond: cond,
+    'uint8': numpy.uint8,
+    'uint16': numpy.uint16,
+    'uint32': numpy.uint32,
+    'uint64': numpy.uint64,
+    'int8': numpy.int8,
+    'int16': numpy.int16,
+    'int32': numpy.int32,
+    'int64': numpy.int64,
+    'float16': numpy.float16,
+    'float32': numpy.float32,
+    'float64': numpy.float64,
+    'ceil_div': lambda a, b: (a + b - 1) // b,
     'max_num_threads': max_num_threads
 }
 
diff --git a/python/tvm/topi/cuda/conv2d_hwnc_tensorcore.py b/python/tvm/topi/cuda/conv2d_hwnc_tensorcore.py
index 592613ffcf92..b7dad79642b0 100644
--- a/python/tvm/topi/cuda/conv2d_hwnc_tensorcore.py
+++ b/python/tvm/topi/cuda/conv2d_hwnc_tensorcore.py
@@ -20,6 +20,7 @@
 import tvm
 from tvm import te
 from tvm import autotvm
+from tvm.target import Target
 from tvm.topi.cuda.injective import schedule_injective_from_existing
 from ..util import get_const_tuple, traverse_inline, simplify, tag
 from ..nn.pad import pad
@@ -158,7 +159,8 @@ def hwnc_tensorcore_cuda(cfg, Input, Filter, stride, padding, dilation, out_dtyp
         packed_kernel = Filter
     else:
         packed_kernel = te.compute(kernel_shape, lambda kh, kw, o, i, oo, ii:
-                                   Filter[kh, kw, o * wmma_n + oo, i * wmma_k + ii],
+                                   Filter[kh, kw, o * wmma_n +
+                                          oo, i * wmma_k + ii],
                                    name="packed_kernel"
                                    )
 
@@ -218,7 +220,7 @@ def schedule_hwnc_tensorcore_cuda(cfg, s, Conv):
             s[packed_kernel].pragma(
                 s[packed_kernel].op.axis[0], "debug_skip_region")
         else:
-            with tvm.target.create('cuda'):
+            with Target('cuda'):
                 schedule_injective_from_existing(s, packed_kernel)
 
     if isinstance(pad_data.op, te.tensor.ComputeOp) and "pad" in pad_data.op.tag:
@@ -260,7 +262,7 @@ def schedule_hwnc_tensorcore_cuda(cfg, s, Conv):
     if not fuse_pack:
         s[packed_data].compute_inline()
     else:
-        with tvm.target.create('cuda'):
+        with Target('cuda'):
             schedule_injective_from_existing(s, packed_data)
 
     if data_dtype in ['int4', 'uint4']:
diff --git a/python/tvm/topi/cuda/softmax.py b/python/tvm/topi/cuda/softmax.py
index ef976514ccde..dbd032533c57 100644
--- a/python/tvm/topi/cuda/softmax.py
+++ b/python/tvm/topi/cuda/softmax.py
@@ -16,12 +16,13 @@
 # under the License.
 # pylint: disable=invalid-name, unused-variable, trailing-whitespace
 """Schedule for softmax operator"""
-from tvm import target as target_
+from tvm.target import Target
 from tvm import te
 from tvm.contrib import cudnn
 from .. import generic
 from .injective import schedule_injective_from_existing
 
+
 def schedule_softmax(outs):
     """Schedule for softmax op.
 
@@ -39,7 +40,7 @@ def schedule_softmax(outs):
     outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
     s = te.create_schedule([x.op for x in outs])
     softmax = outs[0]
-    tgt = target_.Target.current(allow_none=False)
+    tgt = Target.current(allow_none=False)
 
     op_tag = softmax.op.tag
     if op_tag == 'softmax_output':
diff --git a/python/tvm/topi/generic/__init__.py b/python/tvm/topi/generic/__init__.py
index 6171317cd80f..cc64abab8ed8 100644
--- a/python/tvm/topi/generic/__init__.py
+++ b/python/tvm/topi/generic/__init__.py
@@ -27,7 +27,7 @@
 .. code-block:: python
 
   # create schedule that dispatches to topi.cuda.schedule_injective
-  with tvm.target.create("cuda"):
+  with tvm.target.Target("cuda"):
     s = tvm.tir.generic.schedule_injective(outs)
 """
 from __future__ import absolute_import as _abs
diff --git a/python/tvm/topi/testing/common.py b/python/tvm/topi/testing/common.py
index 721493e81b43..3a7c60522dbf 100644
--- a/python/tvm/topi/testing/common.py
+++ b/python/tvm/topi/testing/common.py
@@ -37,7 +37,7 @@
 
 def dispatch(target, dispatch_map):
     if isinstance(target, str):
-        target = tvm.target.create(target)
+        target = tvm.target.Target(target)
     assert isinstance(target, tvm.target.Target)
     for key in target.keys:
         if key in dispatch_map:
diff --git a/rust/tvm/examples/resnet/src/build_resnet.py b/rust/tvm/examples/resnet/src/build_resnet.py
index 1142f9970385..4dc1a2c26189 100644
--- a/rust/tvm/examples/resnet/src/build_resnet.py
+++ b/rust/tvm/examples/resnet/src/build_resnet.py
@@ -48,7 +48,7 @@
 build_dir = args.build_dir
 batch_size = args.batch_size
 opt_level = args.opt_level
-target = tvm.target.create(args.target)
+target = tvm.target.Target(args.target)
 image_shape = tuple(map(int, args.image_shape.split(",")))
 data_shape = (batch_size,) + image_shape
 
diff --git a/src/auto_scheduler/measure_record.cc b/src/auto_scheduler/measure_record.cc
index 02f244f93de5..99c01b17e78e 100755
--- a/src/auto_scheduler/measure_record.cc
+++ b/src/auto_scheduler/measure_record.cc
@@ -127,7 +127,7 @@ struct Handler<::tvm::auto_scheduler::SearchTaskNode> {
     s = reader->NextArrayItem();
     CHECK(s);
     reader->Read(&str_value);
-    data->target = ::tvm::Target::Create(str_value);
+    data->target = ::tvm::Target(str_value);
     s = reader->NextArrayItem();
     CHECK(!s);
   }
diff --git a/src/driver/driver_api.cc b/src/driver/driver_api.cc
index 14aa4fc56e2e..e4851b5f0302 100644
--- a/src/driver/driver_api.cc
+++ b/src/driver/driver_api.cc
@@ -60,9 +60,9 @@ Target DefaultTargetHost(Target target) {
     return target;
   } else {
     if (LLVMEnabled()) {
-      return target::llvm();
+      return Target("llvm");
     } else {
-      return target::stackvm();
+      return Target("stackvm");
     }
   }
 }
@@ -294,10 +294,10 @@ runtime::Module build(const Map<Target, IRModule>& inputs, const Target& target_
 runtime::Module build(const Map<String, IRModule>& inputs, const Target& target_host) {
   Map<Target, IRModule> updated_input;
   for (const auto& it : inputs) {
-    auto target = Target::Create(it.first);
+    auto target = Target(it.first);
     Optional<String> device = target->GetAttr<String>("device");
     if (device.defined() && device.value() == "vta") {
-      target = Target::Create("ext_dev");
+      target = Target("ext_dev");
     }
     updated_input.Set(target, it.second);
   }
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 533619ec8a19..21fd5915a806 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -339,9 +339,9 @@ class RelayBuildModule : public runtime::ModuleNode {
    */
   Target CreateDefaultTarget(int device_type) {
     std::string name = runtime::DeviceName(device_type);
-    if (name == "cpu") return Target::Create("llvm");
-    if (name == "gpu") return Target::Create("cuda");
-    return Target::Create(name);
+    if (name == "cpu") return Target("llvm");
+    if (name == "gpu") return Target("cuda");
+    return Target(name);
   }
 
   /*!
@@ -443,7 +443,7 @@ class RelayBuildModule : public runtime::ModuleNode {
       // llvm if "codegen.LLVMModuleCreate" is accessible.
       const runtime::PackedFunc* pf = runtime::Registry::Get("codegen.LLVMModuleCreate");
       if (!target_host.defined())
-        target_host = (pf != nullptr) ? target::llvm() : target::stackvm();
+        target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm");
 
       if (target_host.defined() && target_host->kind->name == "llvm") {
         // If we can decide the target is LLVM, we then create an empty LLVM module.
diff --git a/src/relay/backend/compile_engine.cc b/src/relay/backend/compile_engine.cc
index a083c3b83b12..3f7af37426b3 100644
--- a/src/relay/backend/compile_engine.cc
+++ b/src/relay/backend/compile_engine.cc
@@ -663,7 +663,7 @@ class CompileEngineImpl : public CompileEngineNode {
       const auto name_node = key->source_func->GetAttr<String>(tvm::attr::kGlobalSymbol);
       CHECK(name_node.defined()) << "External function has not been attached a name yet.";
       cache_node->func_name = std::string(name_node.value());
-      cache_node->target = tvm::target::ext_dev();
+      cache_node->target = Target("ext_dev");
       cache_node->funcs->Add(GlobalVar(cache_node->func_name), key->source_func);
       value->cached_func = CachedFunc(cache_node);
       return value;
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc
index b5024d55633c..959a7306668f 100644
--- a/src/relay/backend/graph_runtime_codegen.cc
+++ b/src/relay/backend/graph_runtime_codegen.cc
@@ -364,7 +364,7 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<G
     Target target;
     // Handle external function
     if (func->GetAttr<String>(attr::kCompiler).defined()) {
-      target = tvm::target::ext_dev();
+      target = Target("ext_dev");
       CCacheKey key = (*pf0)(func, target);
       CachedFunc ext_func = (*pf1)(compile_engine_, key);
       CHECK(ext_func.defined()) << "External function is not defined.";
diff --git a/src/relay/backend/interpreter.cc b/src/relay/backend/interpreter.cc
index 08c5a7cce353..2afaa86a32ac 100644
--- a/src/relay/backend/interpreter.cc
+++ b/src/relay/backend/interpreter.cc
@@ -284,7 +284,7 @@ class Interpreter : public ExprFunctor<ObjectRef(const Expr& n)>,
   }
 
   Array<Shape> ComputeDynamicShape(const Function& func, const Array<ObjectRef>& args) {
-    CCacheKey key(func, Target::Create("llvm"));
+    CCacheKey key(func, Target("llvm"));
     auto cfunc = engine_->LowerShapeFunc(key);
     size_t arity = cfunc->inputs.size() + cfunc->outputs.size();
 
diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc
index 18b23c42c6ea..aeb0c5aa55b2 100644
--- a/src/relay/backend/vm/compiler.cc
+++ b/src/relay/backend/vm/compiler.cc
@@ -237,9 +237,9 @@ std::vector<int64_t> ToAllocTensorShape(NDArray shape) {
  */
 Target CreateDefaultTarget(int device_type) {
   std::string name = runtime::DeviceName(device_type);
-  if (name == "cpu") return Target::Create("llvm");
-  if (name == "gpu") return Target::Create("cuda");
-  return Target::Create(name);
+  if (name == "cpu") return Target("llvm");
+  if (name == "gpu") return Target("cuda");
+  return Target(name);
 }
 
 int GetFallbackDevice() {
@@ -522,7 +522,7 @@ class VMFunctionCompiler : ExprFunctor<void(const Expr& expr)> {
     Target target;
 
     if (func->GetAttr<String>(attr::kCompiler).defined()) {
-      target = tvm::target::ext_dev();
+      target = Target("ext_dev");
     } else {
       // Next generate the invoke instruction.
       if (expr_device_map_.empty()) {
diff --git a/src/relay/transforms/fold_constant.cc b/src/relay/transforms/fold_constant.cc
index bdc613d85cdb..0a1f173cef96 100644
--- a/src/relay/transforms/fold_constant.cc
+++ b/src/relay/transforms/fold_constant.cc
@@ -222,7 +222,7 @@ class ConstantFolder : public ExprMutator {
     DLContext ctx;
     ctx.device_type = kDLCPU;
     ctx.device_id = 0;
-    Target target = Target::Create("llvm");
+    Target target = Target("llvm");
     // use a fresh build context
     // in case we are already in a build context.
     // needed for both execution and creation(due to JIT)
diff --git a/src/relay/transforms/partial_eval.cc b/src/relay/transforms/partial_eval.cc
index e07dbea59bd1..afe2bd5d9302 100644
--- a/src/relay/transforms/partial_eval.cc
+++ b/src/relay/transforms/partial_eval.cc
@@ -536,7 +536,7 @@ DLContext CPUContext() {
 FInterpreter CPUInterpreter() {
   using tvm::transform::PassContext;
 
-  Target target = Target::Create("llvm");
+  Target target = Target("llvm");
   // use a fresh build context
   // in case we are already in a build context.
   With<PassContext> fresh_build_ctx(PassContext::Create());
diff --git a/src/target/build_common.h b/src/target/build_common.h
index 531bd629bbdb..9d92697aa319 100644
--- a/src/target/build_common.h
+++ b/src/target/build_common.h
@@ -63,22 +63,6 @@ inline std::unordered_map<std::string, runtime::FunctionInfo> ExtractFuncInfo(co
   return fmap;
 }
 
-inline void UpdateTargetConfigKeyValueEntry(const String& key, const String& value,
-                                            Map<String, ObjectRef>* target_config,
-                                            bool error_if_inconsistent) {
-  if (target_config->count(key)) {
-    const ObjectRef& obj = (*target_config)[key];
-    CHECK(obj->IsInstance<StringObj>()) << "TypeError: Expect target key \"" << key
-                                        << "\" to be String, but gets type: " << obj->GetTypeKey();
-    if (error_if_inconsistent) {
-      String old_value = Downcast<String>(obj);
-      CHECK_EQ(old_value, value) << "ValueError: Target key \"" << key << "\" has been set to \""
-                                 << old_value << "\", and cannot be reset to \"" << value << "\"";
-    }
-  }
-  target_config->Set(key, value);
-}
-
 }  // namespace codegen
 }  // namespace tvm
 #endif  // TVM_TARGET_BUILD_COMMON_H_
diff --git a/src/target/llvm/codegen_amdgpu.cc b/src/target/llvm/codegen_amdgpu.cc
index c19c01b3acdf..205a8a44c1ea 100644
--- a/src/target/llvm/codegen_amdgpu.cc
+++ b/src/target/llvm/codegen_amdgpu.cc
@@ -191,75 +191,13 @@ class CodeGenAMDGPU : public CodeGenLLVM {
   }
 };
 
-inline int DetectROCMComputeVersion(const Target& target) {
-  if (const Optional<String> mcpu = target->GetAttr<String>("mcpu")) {
-    std::string gfx = mcpu.value();
-    if (gfx.length() >= 3 && gfx.substr(0, 3) == "gfx") {
-      int version;
-      std::stringstream is(gfx.substr(3));
-      if (is >> version) {
-        return version;
-      }
-    }
-    LOG(FATAL) << "ValueError: Unrecognized -mcpu value: " << mcpu;
-  }
-  TVMContext tvm_ctx;
-  tvm_ctx.device_type = kDLROCM;
-  tvm_ctx.device_id = 0;
-  tvm::runtime::DeviceAPI* api = tvm::runtime::DeviceAPI::Get(tvm_ctx, true);
-  if (api != nullptr) {
-    TVMRetValue val;
-    api->GetAttr(tvm_ctx, tvm::runtime::kExist, &val);
-    if (val.operator int() == 1) {
-      tvm::runtime::DeviceAPI::Get(tvm_ctx)->GetAttr(tvm_ctx, tvm::runtime::kGcnArch, &val);
-      return val.operator int();
-    }
-  }
-  LOG(WARNING) << "Cannot find -mcpu to specify rocm compute version assume gfx900";
-  return 900;
-}
-
-inline int DetectROCMApiVersion() {
-  TVMContext tvm_ctx;
-  tvm_ctx.device_type = kDLROCM;
-  tvm_ctx.device_id = 0;
-  tvm::runtime::DeviceAPI* api = tvm::runtime::DeviceAPI::Get(tvm_ctx, true);
-  if (api != nullptr) {
-    TVMRetValue val;
-    api->GetAttr(tvm_ctx, tvm::runtime::kApiVersion, &val);
-    return val.operator int();
-  }
-  LOG(WARNING) << "Cannot detect ROCm version, assume >= 3.5";
-  return 305;
-}
-
-Target UpdateTarget(const Target& original_target) {
-  Map<String, ObjectRef> target_config = original_target->Export();
-  UpdateTargetConfigKeyValueEntry("mtriple", "amdgcn-amd-amdhsa-hcc", &target_config, true);
-  UpdateTargetConfigKeyValueEntry("mcpu",
-                                  "gfx" + std::to_string(DetectROCMComputeVersion(original_target)),
-                                  &target_config, false);
-  if (DetectROCMApiVersion() < 305) {
-    // before ROCm 3.5 we needed code object v2, starting
-    // with 3.5 we need v3 (this argument disables v3)
-    Array<String> mattr;
-    if (target_config.count("mattr")) {
-      mattr = Downcast<Array<String>>(target_config["mattr"]);
-    }
-    mattr.push_back("-code-object-v3");
-    target_config.Set("mattr", mattr);
-  }
-  return Target::FromConfig(target_config);
-}
-
-runtime::Module BuildAMDGPU(IRModule mod, Target original_target) {
+runtime::Module BuildAMDGPU(IRModule mod, Target target) {
 #if TVM_LLVM_VERSION < 90
   LOG(FATAL) << "AMDGPU backend requires at least LLVM 9";
   // Lower versions will crash when loading the bitcode, see
   // issue #4087 for a discussion
 #endif
   InitializeLLVM();
-  Target target = UpdateTarget(original_target);
   std::unique_ptr<llvm::TargetMachine> tm = GetLLVMTargetMachine(target);
   std::unique_ptr<llvm::LLVMContext> ctx(new llvm::LLVMContext());
   // careful: cg will hold a naked pointer reference to ctx, so it should
diff --git a/src/target/llvm/codegen_blob.cc b/src/target/llvm/codegen_blob.cc
index 5d8a7697e0e7..edf744d8b2cb 100644
--- a/src/target/llvm/codegen_blob.cc
+++ b/src/target/llvm/codegen_blob.cc
@@ -34,7 +34,7 @@ namespace codegen {
 std::pair<std::unique_ptr<llvm::Module>, std::shared_ptr<llvm::LLVMContext>> CodeGenBlob(
     const std::string& data, bool system_lib, const std::string& target_triple) {
   InitializeLLVM();
-  Target target = Target::Create("llvm -mtriple " + target_triple);
+  Target target = Target("llvm -mtriple " + target_triple);
   auto tm = GetLLVMTargetMachine(target);
   auto triple = tm->getTargetTriple();
   auto ctx = std::make_shared<llvm::LLVMContext>();
diff --git a/src/target/llvm/codegen_nvptx.cc b/src/target/llvm/codegen_nvptx.cc
index fe409ba0a0cd..e9999f1ca283 100644
--- a/src/target/llvm/codegen_nvptx.cc
+++ b/src/target/llvm/codegen_nvptx.cc
@@ -236,36 +236,16 @@ llvm::Value* CodeGenNVPTX::CreateIntrinsic(const CallNode* op) {
   return CodeGenLLVM::CreateIntrinsic(op);
 }
 
-inline int DetectCUDAComputeVersion() {
-  TVMContext tvm_ctx;
-  tvm_ctx.device_type = kDLGPU;
-  tvm_ctx.device_id = 0;
-  TVMRetValue val;
-  tvm::runtime::DeviceAPI::Get(tvm_ctx)->GetAttr(tvm_ctx, tvm::runtime::kExist, &val);
-  if (val.operator int() == 1) {
-    tvm::runtime::DeviceAPI::Get(tvm_ctx)->GetAttr(tvm_ctx, tvm::runtime::kComputeVersion, &val);
-    std::string version = val;
-    std::istringstream is(version);
-    double ver;
-    is >> ver;
-    return static_cast<int>(ver * 10);
-  } else {
-    return 20;
-  }
-}
-
-Target UpdateTarget(const Target& original_target, int compute_ver) {
-  Map<String, ObjectRef> target_config = original_target->Export();
-  UpdateTargetConfigKeyValueEntry("mtriple", "nvptx64-nvidia-cuda", &target_config, true);
-  UpdateTargetConfigKeyValueEntry("mcpu", "sm_" + std::to_string(compute_ver), &target_config,
-                                  false);
-  return Target::FromConfig(target_config);
+int GetCUDAComputeVersion(const Target& target) {
+  Optional<String> mcpu = target->GetAttr<String>("mcpu");
+  CHECK(mcpu.defined()) << "InternalError: \"-mcpu\" is undefined in the NVPTX target";
+  std::string sm_version = mcpu.value();
+  return std::stoi(sm_version.substr(3));
 }
 
-runtime::Module BuildNVPTX(IRModule mod, Target original_target) {
+runtime::Module BuildNVPTX(IRModule mod, Target target) {
   InitializeLLVM();
-  int compute_ver = DetectCUDAComputeVersion();
-  Target target = UpdateTarget(original_target, compute_ver);
+  int compute_ver = GetCUDAComputeVersion(target);
   std::unique_ptr<llvm::TargetMachine> tm = GetLLVMTargetMachine(target);
   std::unique_ptr<llvm::LLVMContext> ctx(new llvm::LLVMContext());
   // careful: cg will hold a naked pointer reference to ctx, so it should
diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
index b3d448aee77f..712980cdbe41 100644
--- a/src/target/llvm/llvm_module.cc
+++ b/src/target/llvm/llvm_module.cc
@@ -265,7 +265,7 @@ class LLVMModuleNode final : public runtime::ModuleNode {
       target_metadata = os.str();
     }
     mptr_ = module_.get();
-    tm_ = GetLLVMTargetMachine(Target::Create(target_metadata));
+    tm_ = GetLLVMTargetMachine(Target(target_metadata));
   }
 
   void LoadIR(const std::string& file_name) {
@@ -287,7 +287,7 @@ class LLVMModuleNode final : public runtime::ModuleNode {
       return;
     }
     if (!target_.defined()) {
-      target_ = Target::Create("llvm");
+      target_ = Target("llvm");
     }
     llvm::EngineBuilder builder(std::move(module_));
     std::string triple, mcpu, mattr;
@@ -304,7 +304,7 @@ class LLVMModuleNode final : public runtime::ModuleNode {
     }
     builder.setTargetOptions(opt);
     auto tm = std::unique_ptr<llvm::TargetMachine>(builder.selectTarget());
-    std::unique_ptr<llvm::TargetMachine> tm_sys = GetLLVMTargetMachine(Target::Create("llvm"));
+    std::unique_ptr<llvm::TargetMachine> tm_sys = GetLLVMTargetMachine(Target("llvm"));
     if (tm_sys->getTargetTriple().getArch() != tm->getTargetTriple().getArch()) {
       LOG(FATAL) << "Cannot run module, architecture mismatch "
                  << " module=" << tm->getTargetTriple().str()
@@ -369,7 +369,7 @@ TVM_REGISTER_GLOBAL("target.build.llvm")
 
 TVM_REGISTER_GLOBAL("codegen.LLVMModuleCreate")
     .set_body_typed([](std::string target_str, std::string module_name) -> runtime::Module {
-      Target target = Target::Create(target_str);
+      Target target = Target(target_str);
       auto n = make_object<LLVMModuleNode>();
       // Generate a LLVM module from an input target string
       InitializeLLVM();
@@ -403,7 +403,7 @@ TVM_REGISTER_GLOBAL("runtime.module.loadfile_ll")
 TVM_REGISTER_GLOBAL("codegen.llvm_target_enabled")
     .set_body_typed([](std::string target_str) -> bool {
       InitializeLLVM();
-      Target target = Target::Create(target_str);
+      Target target = Target(target_str);
       return (GetLLVMTargetMachine(target, true) != nullptr);
     });
 
diff --git a/src/target/tag.cc b/src/target/tag.cc
new file mode 100644
index 000000000000..3e47e456691a
--- /dev/null
+++ b/src/target/tag.cc
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/target/target_tag.cc
+ * \brief Target tag registry
+ */
+#include <tvm/runtime/registry.h>
+#include <tvm/target/tag.h>
+#include <tvm/target/target.h>
+
+#include "../node/attr_registry.h"
+
+namespace tvm {
+
+TVM_REGISTER_NODE_TYPE(TargetTagNode);
+
+TVM_REGISTER_GLOBAL("target.TargetTagListTags").set_body_typed(TargetTag::ListTags);
+TVM_REGISTER_GLOBAL("target.TargetTagAddTag").set_body_typed(TargetTag::AddTag);
+
+/**********  Registry-related code  **********/
+
+using TargetTagRegistry = AttrRegistry<TargetTagRegEntry, TargetTag>;
+
+TargetTagRegEntry& TargetTagRegEntry::RegisterOrGet(const String& target_tag_name) {
+  return TargetTagRegistry::Global()->RegisterOrGet(target_tag_name);
+}
+
+Optional<Target> TargetTag::Get(const String& target_tag_name) {
+  const TargetTagRegEntry* reg = TargetTagRegistry::Global()->Get(target_tag_name);
+  if (reg == nullptr) {
+    return NullOpt;
+  }
+  return Target(reg->tag_->config);
+}
+
+Map<String, Target> TargetTag::ListTags() {
+  Map<String, Target> result;
+  for (const String& tag : TargetTagRegistry::Global()->ListAllNames()) {
+    result.Set(tag, TargetTag::Get(tag).value());
+  }
+  return result;
+}
+
+Target TargetTag::AddTag(String name, Map<String, ObjectRef> config, bool override) {
+  TargetTagRegEntry& tag = TargetTagRegEntry::RegisterOrGet(name).set_name();
+  CHECK(override || tag.tag_->config.empty())
+      << "Tag \"" << name << "\" has been previously defined as: " << tag.tag_->config;
+  tag.set_config(config);
+  return Target(config);
+}
+
+/**********  Register Target tags  **********/
+
+TVM_REGISTER_TARGET_TAG("nvidia/rtx2080ti")
+    .set_config({
+        {"kind", String("cuda")},
+        {"arch", String("sm_75")},
+    });
+
+}  // namespace tvm
diff --git a/src/target/target.cc b/src/target/target.cc
index ccc0023378df..052824249392 100644
--- a/src/target/target.cc
+++ b/src/target/target.cc
@@ -21,8 +21,8 @@
  * \file src/target/target.cc
  */
 #include <dmlc/thread_local.h>
-#include <tvm/node/repr_printer.h>
 #include <tvm/runtime/registry.h>
+#include <tvm/target/tag.h>
 #include <tvm/target/target.h>
 #include <tvm/target/target_kind.h>
 #include <tvm/tir/expr.h>
@@ -34,12 +34,27 @@
 
 namespace tvm {
 
-using runtime::PackedFunc;
-using runtime::TVMArgs;
-using runtime::TVMRetValue;
-
 TVM_REGISTER_NODE_TYPE(TargetNode);
 
+class TargetInternal {
+ public:
+  static void EnterScope(Target target) { target.EnterWithScope(); }
+  static void ExitScope(Target target) { target.ExitWithScope(); }
+  static Map<String, ObjectRef> Export(Target target) { return target->Export(); }
+  static const TargetKindNode::ValueTypeInfo& FindTypeInfo(const TargetKind& kind,
+                                                           const std::string& key);
+  static Optional<String> StringifyAttrsToRaw(const Map<String, ObjectRef>& attrs);
+  static ObjectRef ParseType(const std::string& str, const TargetKindNode::ValueTypeInfo& info);
+  static ObjectRef ParseType(const ObjectRef& obj, const TargetKindNode::ValueTypeInfo& info);
+  static ObjectPtr<Object> FromString(const String& tag_or_config_or_target_str);
+  static ObjectPtr<Object> FromConfigString(const String& config_str);
+  static ObjectPtr<Object> FromRawString(const String& target_str);
+  static ObjectPtr<Object> FromConfig(std::unordered_map<String, ObjectRef> config);
+  static void ConstructorDispatcher(TVMArgs args, TVMRetValue* rv);
+};
+
+/**********  Helper functions  **********/
+
 static std::vector<String> DeduplicateKeys(const std::vector<String>& keys) {
   std::vector<String> new_keys;
   for (size_t i = 0; i < keys.size(); ++i) {
@@ -57,119 +72,216 @@ static std::vector<String> DeduplicateKeys(const std::vector<String>& keys) {
   return new_keys;
 }
 
-static inline std::string RemovePrefixDashes(const std::string& s) {
-  size_t n_dashes = 0;
-  for (; n_dashes < s.length() && s[n_dashes] == '-'; ++n_dashes) {
+template <class TObj>
+static const TObj* ObjTypeCheck(const ObjectRef& obj, const std::string& expected_type) {
+  const TObj* ptr = obj.as<TObj>();
+  if (ptr == nullptr) {
+    std::ostringstream os;
+    os << ": Expects type \"" << expected_type << "\", but gets \"" << obj->GetTypeKey()
+       << "\" for object: " << obj;
+    throw dmlc::Error(os.str());
+  }
+  return ptr;
+}
+
+static TargetKind GetTargetKind(const String& name) {
+  Optional<TargetKind> kind = TargetKind::Get(name);
+  if (!kind.defined()) {
+    throw dmlc::Error(": Target kind \"" + name + "\" is not defined");
+  }
+  return kind.value();
+}
+
+static std::string RemovePrefixDashes(const std::string& s) {
+  int n_dashes = 0;
+  int len = s.length();
+  for (; n_dashes < len && s[n_dashes] == '-'; ++n_dashes) {
+  }
+  if (n_dashes == 0) {
+    throw dmlc::Error(": Attribute keys should start with '-', not an attribute key: " + s);
+  }
+  if (n_dashes >= len) {
+    throw dmlc::Error(": Not an attribute key: " + s);
   }
-  CHECK(0 < n_dashes && n_dashes < s.size()) << "ValueError: Not an attribute key \"" << s << "\"";
   return s.substr(n_dashes);
 }
 
-static inline int FindUniqueSubstr(const std::string& str, const std::string& substr) {
+static int FindFirstSubstr(const std::string& str, const std::string& substr) {
   size_t pos = str.find_first_of(substr);
-  if (pos == std::string::npos) {
-    return -1;
-  }
-  size_t next_pos = pos + substr.size();
-  CHECK(next_pos >= str.size() || str.find_first_of(substr, next_pos) == std::string::npos)
-      << "ValueError: At most one \"" << substr << "\" is allowed in "
-      << "the the given string \"" << str << "\"";
-  return pos;
+  return pos == std::string::npos ? -1 : pos;
+}
+
+static Optional<String> JoinString(const std::vector<String>& array, char separator) {
+  if (array.empty()) {
+    return NullOpt;
+  }
+  std::ostringstream os;
+  os << array[0];
+  for (size_t i = 1; i < array.size(); ++i) {
+    os << separator << array[i];
+  }
+  return String(os.str());
+}
+
+static int ParseKVPair(const std::string& s, const std::string& s_next, std::string* key,
+                       std::string* value) {
+  int pos;
+  std::string& result_k = *key;
+  std::string& result_v = *value;
+  if ((pos = FindFirstSubstr(s, "=")) != -1) {
+    // case 1. --key=value
+    result_k = s.substr(0, pos);
+    result_v = s.substr(pos + 1);
+    if (result_k.empty() || result_v.empty()) {
+      throw dmlc::Error(": Empty attribute key or value in \"" + s + "\"");
+    }
+    return 1;
+  } else if (!s_next.empty() && s_next[0] != '-') {
+    // case 2. --key value
+    result_k = s;
+    result_v = s_next;
+    return 2;
+  }
+  // case 3. --boolean-key
+  result_k = s;
+  result_v = "1";
+  return 1;
+}
+
+const TargetKindNode::ValueTypeInfo& TargetInternal::FindTypeInfo(const TargetKind& kind,
+                                                                  const std::string& key) {
+  auto it = kind->key2vtype_.find(key);
+  if (it == kind->key2vtype_.end()) {
+    std::ostringstream os;
+    os << ": Cannot recognize \'" << key << "\'. Candidates are: ";
+    bool is_first = true;
+    for (const auto& kv : kind->key2vtype_) {
+      if (is_first) {
+        is_first = false;
+      } else {
+        os << ", ";
+      }
+      os << kv.first;
+    }
+    throw dmlc::Error(os.str());
+  }
+  return it->second;
 }
 
-static inline ObjectRef ParseAtomicType(uint32_t type_index, const std::string& str) {
+/**********  Parsing  **********/
+
+ObjectRef TargetInternal::ParseType(const std::string& str,
+                                    const TargetKindNode::ValueTypeInfo& info) {
   std::istringstream is(str);
-  if (type_index == Integer::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+  if (info.type_index == Integer::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing integer
     int v;
-    is >> v;
-    return is.fail() ? ObjectRef(nullptr) : Integer(v);
-  } else if (type_index == String::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    if (!(is >> v)) {
+      throw dmlc::Error(": Cannot parse into type \"Integer\" from string: " + str);
+    }
+    return Integer(v);
+  } else if (info.type_index == String::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing string
     std::string v;
-    is >> v;
-    return is.fail() ? ObjectRef(nullptr) : String(v);
+    if (!(is >> v)) {
+      throw dmlc::Error(": Cannot parse into type \"String\" from string: " + str);
+    }
+    return String(v);
+  } else if (info.type_index == Target::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing target
+    return Target(TargetInternal::FromString(str));
+  } else if (info.type_index == ArrayNode::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing array
+    std::vector<ObjectRef> result;
+    for (std::string substr; std::getline(is, substr, ',');) {
+      try {
+        ObjectRef parsed = TargetInternal::ParseType(substr, *info.key);
+        result.push_back(parsed);
+      } catch (const dmlc::Error& e) {
+        std::string index = "[" + std::to_string(result.size()) + "]";
+        throw dmlc::Error(index + e.what());
+      }
+    }
+    return Array<ObjectRef>(result);
   }
-  return ObjectRef(nullptr);
+  throw dmlc::Error(": Unsupported type \"" + info.type_key + "\" for parsing from string: " + str);
 }
 
-Map<String, ObjectRef> TargetNode::ParseAttrsFromRaw(
-    const std::vector<std::string>& options) const {
-  std::unordered_map<String, ObjectRef> attrs;
-  for (size_t iter = 0, end = options.size(); iter < end;) {
-    // remove the prefix dashes
-    std::string s = RemovePrefixDashes(options[iter++]);
-    // parse name-obj pair
-    std::string name;
-    std::string obj;
-    int pos;
-    if ((pos = FindUniqueSubstr(s, "=")) != -1) {
-      // case 1. --key=value
-      name = s.substr(0, pos);
-      obj = s.substr(pos + 1);
-      CHECK(!name.empty()) << "ValueError: Empty attribute key in \"" << options[iter - 1] << "\"";
-      CHECK(!obj.empty()) << "ValueError: Empty attribute in \"" << options[iter - 1] << "\"";
-    } else if (iter < end && options[iter][0] != '-') {
-      // case 2. --key value
-      name = s;
-      obj = options[iter++];
-    } else {
-      // case 3. --boolean-key
-      name = s;
-      obj = "1";
+ObjectRef TargetInternal::ParseType(const ObjectRef& obj,
+                                    const TargetKindNode::ValueTypeInfo& info) {
+  if (info.type_index == Integer::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing integer
+    return GetRef<Integer>(ObjTypeCheck<IntImmNode>(obj, "Integer"));
+  } else if (info.type_index == String::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing string
+    return GetRef<String>(ObjTypeCheck<StringObj>(obj, "String"));
+  } else if (info.type_index == Target::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing target
+    if (const auto* ptr = obj.as<TargetNode>()) {
+      return GetRef<Target>(ptr);
+    } else if (const auto* ptr = obj.as<StringObj>()) {
+      return Target(TargetInternal::FromString(GetRef<String>(ptr)));
+    } else if (const auto* ptr = obj.as<MapNode>()) {
+      for (const auto& kv : *ptr) {
+        if (!kv.first->IsInstance<StringObj>()) {
+          throw dmlc::Error(": Target object requires key of dict to be str, but get: " +
+                            kv.first->GetTypeKey());
+        }
+      }
+      Map<String, ObjectRef> config = GetRef<Map<String, ObjectRef>>(ptr);
+      return Target(TargetInternal::FromConfig({config.begin(), config.end()}));
     }
-    // check if `name` is invalid
-    auto it = this->kind->key2vtype_.find(name);
-    if (it == this->kind->key2vtype_.end()) {
-      std::ostringstream os;
-      os << "AttributeError: Invalid config option, cannot recognize \'" << name
-         << "\'. Candidates are:";
-      for (const auto& kv : this->kind->key2vtype_) {
-        os << "\n  " << kv.first;
+    throw dmlc::Error(": Expect type 'dict' or 'str' to construct Target, but get: " +
+                      obj->GetTypeKey());
+  } else if (info.type_index == ArrayNode::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing array
+    const auto* array = ObjTypeCheck<ArrayNode>(obj, "Array");
+    std::vector<ObjectRef> result;
+    for (const ObjectRef& e : *array) {
+      try {
+        result.push_back(TargetInternal::ParseType(e, *info.key));
+      } catch (const dmlc::Error& e) {
+        std::string index = '[' + std::to_string(result.size()) + ']';
+        throw dmlc::Error(index + e.what());
       }
-      LOG(FATAL) << os.str();
     }
-    // check if `name` has been set once
-    CHECK(!attrs.count(name)) << "AttributeError: key \"" << name
-                              << "\" appears more than once in the target string";
-    // then `name` is valid, let's parse them
-    // only several types are supported when parsing raw string
-    const auto& info = it->second;
-    ObjectRef parsed_obj(nullptr);
-    if (info.type_index != ArrayNode::_type_index) {
-      parsed_obj = ParseAtomicType(info.type_index, obj);
-    } else {
-      Array<ObjectRef> array;
-      std::string item;
-      bool failed = false;
-      uint32_t type_index = info.key->type_index;
-      for (std::istringstream is(obj); std::getline(is, item, ',');) {
-        ObjectRef parsed_obj = ParseAtomicType(type_index, item);
-        if (parsed_obj.defined()) {
-          array.push_back(parsed_obj);
-        } else {
-          failed = true;
-          break;
-        }
+    return Array<ObjectRef>(result);
+  } else if (info.type_index == MapNode::_GetOrAllocRuntimeTypeIndex()) {
+    // Parsing map
+    const auto* map = ObjTypeCheck<MapNode>(obj, "Map");
+    std::unordered_map<ObjectRef, ObjectRef, ObjectHash, ObjectEqual> result;
+    for (const auto& kv : *map) {
+      ObjectRef key, val;
+      try {
+        key = TargetInternal::ParseType(kv.first, *info.key);
+      } catch (const dmlc::Error& e) {
+        std::ostringstream os;
+        os << "'s key \"" << key << "\"" << e.what();
+        throw dmlc::Error(os.str());
       }
-      if (!failed) {
-        parsed_obj = std::move(array);
+      try {
+        val = TargetInternal::ParseType(kv.second, *info.val);
+      } catch (const dmlc::Error& e) {
+        std::ostringstream os;
+        os << "[\"" << key << "\"]" << e.what();
+        throw dmlc::Error(os.str());
       }
+      result[key] = val;
     }
-    if (!parsed_obj.defined()) {
-      LOG(FATAL) << "ValueError: Cannot parse type \"" << info.type_key << "\""
-                 << ", where attribute key is \"" << name << "\""
-                 << ", and attribute is \"" << obj << "\"";
-    }
-    attrs[name] = std::move(parsed_obj);
+    return Map<ObjectRef, ObjectRef>(result);
   }
-  // set default attribute values if they do not exist
-  for (const auto& kv : this->kind->key2default_) {
-    if (!attrs.count(kv.first)) {
-      attrs[kv.first] = kv.second;
-    }
+  if (info.type_index != obj->type_index()) {
+    std::ostringstream os;
+    os << ": Parsing type \"" << info.type_key
+       << "\" is not supported for the given object of type \"" << obj->GetTypeKey()
+       << "\". The object is: " << obj;
+    throw dmlc::Error(os.str());
   }
-  return attrs;
+  return obj;
 }
 
+/**********  Stringifying  **********/
+
 static inline Optional<String> StringifyAtomicType(const ObjectRef& obj) {
   if (const auto* p = obj.as<IntImmNode>()) {
     return String(std::to_string(p->value));
@@ -180,19 +292,7 @@ static inline Optional<String> StringifyAtomicType(const ObjectRef& obj) {
   return NullOpt;
 }
 
-static inline Optional<String> JoinString(const std::vector<String>& array, char separator) {
-  if (array.empty()) {
-    return NullOpt;
-  }
-  std::ostringstream os;
-  os << array[0];
-  for (size_t i = 1; i < array.size(); ++i) {
-    os << separator << array[i];
-  }
-  return String(os.str());
-}
-
-Optional<String> TargetNode::StringifyAttrsToRaw(const Map<String, ObjectRef>& attrs) const {
+Optional<String> TargetInternal::StringifyAttrsToRaw(const Map<String, ObjectRef>& attrs) {
   std::ostringstream os;
   std::vector<String> keys;
   for (const auto& kv : attrs) {
@@ -225,35 +325,52 @@ Optional<String> TargetNode::StringifyAttrsToRaw(const Map<String, ObjectRef>& a
   return JoinString(result, ' ');
 }
 
-Target Target::CreateTarget(const std::string& name, const std::vector<std::string>& options) {
-  TargetKind kind = TargetKind::Get(name);
-  ObjectPtr<TargetNode> target = make_object<TargetNode>();
-  target->kind = kind;
-  // tag is always empty
-  target->tag = "";
-  // parse attrs
-  target->attrs = target->ParseAttrsFromRaw(options);
-  String device_name = target->GetAttr<String>("device", "").value();
-  // set up keys
-  {
-    std::vector<String> keys;
-    // user provided keys
-    if (Optional<Array<String>> user_keys = target->GetAttr<Array<String>>("keys")) {
-      keys = std::vector<String>(user_keys.value().begin(), user_keys.value().end());
-      target->attrs.erase("keys");
-    }
-    // add `device_name`
-    if (!device_name.empty()) {
-      keys.push_back(device_name);
+const std::string& TargetNode::str() const {
+  if (str_repr_.empty()) {
+    std::ostringstream os;
+    os << kind->name;
+    if (!this->keys.empty()) {
+      os << " -keys=";
+      bool is_first = true;
+      for (const String& s : keys) {
+        if (is_first) {
+          is_first = false;
+        } else {
+          os << ',';
+        }
+        os << s;
+      }
     }
-    // add default keys
-    for (const auto& key : target->kind->default_keys) {
-      keys.push_back(key);
+    if (Optional<String> attrs_str = TargetInternal::StringifyAttrsToRaw(attrs)) {
+      os << ' ' << attrs_str.value();
     }
-    // de-duplicate keys
-    target->keys = DeduplicateKeys(keys);
+    str_repr_ = os.str();
+  }
+  return str_repr_;
+}
+
+/**********  Small member methods  **********/
+
+Target::Target(const String& tag_or_config_or_target_str) {
+  ObjectPtr<Object> target;
+  try {
+    target = TargetInternal::FromString(tag_or_config_or_target_str);
+  } catch (const dmlc::Error& e) {
+    LOG(FATAL) << "ValueError" << e.what()
+               << ". Target creation from string failed: " << tag_or_config_or_target_str;
   }
-  return Target(target);
+  data_ = std::move(target);
+}
+
+Target::Target(const Map<String, ObjectRef>& config) {
+  ObjectPtr<Object> target;
+  try {
+    target = TargetInternal::FromConfig({config.begin(), config.end()});
+  } catch (const dmlc::Error& e) {
+    LOG(FATAL) << "ValueError" << e.what()
+               << ". Target creation from config dict failed: " << config;
+  }
+  data_ = std::move(target);
 }
 
 std::vector<std::string> TargetNode::GetKeys() const {
@@ -288,151 +405,177 @@ Map<String, ObjectRef> TargetNode::Export() const {
   return result;
 }
 
-const std::string& TargetNode::str() const {
-  if (str_repr_.empty()) {
-    std::ostringstream os;
-    os << kind->name;
-    if (!this->keys.empty()) {
-      os << " -keys=";
-      bool is_first = true;
-      for (const String& s : keys) {
-        if (is_first) {
-          is_first = false;
-        } else {
-          os << ',';
-        }
-        os << s;
-      }
-    }
-    if (Optional<String> attrs_str = this->StringifyAttrsToRaw(attrs)) {
-      os << ' ' << attrs_str.value();
+/*! \brief Entry to hold the Target context stack. */
+struct TVMTargetThreadLocalEntry {
+  /*! \brief The current target context */
+  std::stack<Target> context_stack;
+};
+
+/*! \brief Thread local store to hold the Target context stack. */
+using TVMTargetThreadLocalStore = dmlc::ThreadLocalStore<TVMTargetThreadLocalEntry>;
+
+void Target::EnterWithScope() {
+  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
+  entry->context_stack.push(*this);
+}
+
+void Target::ExitWithScope() {
+  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
+  CHECK(!entry->context_stack.empty());
+  CHECK(entry->context_stack.top().same_as(*this));
+  entry->context_stack.pop();
+}
+
+Target Target::Current(bool allow_not_defined) {
+  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
+  if (entry->context_stack.size() > 0) {
+    return entry->context_stack.top();
+  }
+  CHECK(allow_not_defined)
+      << "Target context required. Please set it by constructing a TargetContext";
+
+  return Target();
+}
+
+/**********  Creation  **********/
+
+void TargetInternal::ConstructorDispatcher(TVMArgs args, TVMRetValue* rv) {
+  if (args.num_args == 1) {
+    const auto& arg = args[0];
+    if (arg.IsObjectRef<Target>()) {
+      *rv = Target(arg.AsObjectRef<Target>());
+    } else if (String::CanConvertFrom(arg)) {
+      *rv = Target(arg.operator String());
+    } else if (arg.IsObjectRef<Map<String, ObjectRef>>()) {
+      *rv = Target(arg.operator Map<String, ObjectRef>());
+    } else if (arg.type_code() == kTVMObjectHandle) {
+      ObjectRef obj = arg;
+      LOG(FATAL) << "TypeError: Cannot create target with type: " << obj->GetTypeKey();
+    } else {
+      LOG(FATAL) << "TypeError: Cannot create target with type: "
+                 << runtime::ArgTypeCode2Str(arg.type_code());
     }
-    str_repr_ = os.str();
+    return;
   }
-  return str_repr_;
+  LOG(FATAL) << "ValueError: Invalid number of arguments. Expect 1, but gets: " << args.num_args;
 }
 
-bool StartsWith(const std::string& str, const std::string& pattern) {
-  return str.compare(0, pattern.length(), pattern) == 0;
+ObjectPtr<Object> TargetInternal::FromString(const String& tag_or_config_or_target_str) {
+  if (Optional<Target> target = TargetTag::Get(tag_or_config_or_target_str)) {
+    Target value = target.value();
+    return runtime::ObjectInternal::MoveObjectPtr(&value);
+  }
+  if (!tag_or_config_or_target_str.empty() && tag_or_config_or_target_str.data()[0] == '{') {
+    return TargetInternal::FromConfigString(tag_or_config_or_target_str);
+  }
+  return TargetInternal::FromRawString(tag_or_config_or_target_str);
 }
 
-Target Target::Create(const String& target_str) {
-  std::vector<std::string> splits;
-  std::istringstream is(target_str);
-  for (std::string s; is >> s; splits.push_back(s)) {
+ObjectPtr<Object> TargetInternal::FromConfigString(const String& config_str) {
+  const auto* loader = tvm::runtime::Registry::Get("target._load_config_dict");
+  CHECK(loader) << "AttributeError: \"target._load_config_dict\" is not registered. Please check "
+                   "if the python module is properly loaded";
+  Optional<Map<String, ObjectRef>> config = (*loader)(config_str);
+  if (!config.defined()) {
+    throw dmlc::Error(": Cannot load config dict with python JSON loader");
   }
-  CHECK(!splits.empty()) << "ValueError: Cannot parse empty target string: \"" << target_str
-                         << "\"";
-  return CreateTarget(splits[0], {splits.begin() + 1, splits.end()});
+  return TargetInternal::FromConfig({config.value().begin(), config.value().end()});
 }
 
-ObjectRef TargetNode::ParseAttr(const ObjectRef& obj,
-                                const TargetKindNode::ValueTypeInfo& info) const {
-  if (info.type_index == Integer::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
-    const auto* v = obj.as<IntImmNode>();
-    CHECK(v != nullptr) << "Expect type 'int', but get: " << obj->GetTypeKey();
-    return GetRef<Integer>(v);
-  }
-  if (info.type_index == String::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
-    const auto* v = obj.as<StringObj>();
-    CHECK(v != nullptr) << "Expect type 'str', but get: " << obj->GetTypeKey();
-    return GetRef<String>(v);
-  }
-  if (info.type_index == Target::ContainerType::_GetOrAllocRuntimeTypeIndex()) {
-    CHECK(obj->IsInstance<MapNode>())
-        << "Expect type 'dict' to construct Target, but get: " << obj->GetTypeKey();
-    return Target::FromConfig(Downcast<Map<String, ObjectRef>>(obj));
-  }
-  if (info.type_index == ArrayNode::_GetOrAllocRuntimeTypeIndex()) {
-    CHECK(obj->IsInstance<ArrayNode>()) << "Expect type 'list', but get: " << obj->GetTypeKey();
-    Array<ObjectRef> array = Downcast<Array<ObjectRef>>(obj);
-    std::vector<ObjectRef> result;
-    int i = 0;
-    for (const ObjectRef& e : array) {
-      ++i;
-      try {
-        result.push_back(TargetNode::ParseAttr(e, *info.key));
-      } catch (const dmlc::Error& e) {
-        LOG(FATAL) << "Error occurred when parsing element " << i << " of the array: " << array
-                   << ". Details:\n"
-                   << e.what();
-      }
+ObjectPtr<Object> TargetInternal::FromRawString(const String& target_str) {
+  // Split the string by empty spaces
+  std::string name;
+  std::vector<std::string> options;
+  std::string str;
+  for (std::istringstream is(target_str); is >> str;) {
+    if (name.empty()) {
+      name = str;
+    } else {
+      options.push_back(str);
     }
-    return Array<ObjectRef>(result);
   }
-  if (info.type_index == MapNode::_GetOrAllocRuntimeTypeIndex()) {
-    CHECK(obj->IsInstance<MapNode>()) << "Expect type 'dict', but get: " << obj->GetTypeKey();
-    std::unordered_map<ObjectRef, ObjectRef, ObjectHash, ObjectEqual> result;
-    for (const auto& kv : Downcast<Map<ObjectRef, ObjectRef>>(obj)) {
-      ObjectRef key, val;
-      try {
-        key = TargetNode::ParseAttr(kv.first, *info.key);
-      } catch (const tvm::Error& e) {
-        LOG(FATAL) << "Error occurred when parsing a key of the dict: " << kv.first
-                   << ". Details:\n"
-                   << e.what();
-      }
-      try {
-        val = TargetNode::ParseAttr(kv.second, *info.val);
-      } catch (const tvm::Error& e) {
-        LOG(FATAL) << "Error occurred when parsing a value of the dict: " << kv.second
-                   << ". Details:\n"
-                   << e.what();
+  if (name.empty()) {
+    throw dmlc::Error(": Cannot parse empty target string");
+  }
+  // Create the target config
+  std::unordered_map<String, ObjectRef> config = {{"kind", String(name)}};
+  TargetKind kind = GetTargetKind(name);
+  for (size_t iter = 0, end = options.size(); iter < end;) {
+    std::string key, value;
+    try {
+      // Parse key-value pair
+      std::string s_next = (iter + 1 < options.size()) ? options[iter + 1] : "";
+      iter += ParseKVPair(RemovePrefixDashes(options[iter]), s_next, &key, &value);
+    } catch (const dmlc::Error& e) {
+      throw dmlc::Error(": Error when parsing target" + std::string(e.what()));
+    }
+    try {
+      // check if `key` has been used
+      if (config.count(key)) {
+        throw dmlc::Error(": The key \"" + key + "\" appears more than once");
       }
-      result[key] = val;
+      config[key] = TargetInternal::ParseType(value, TargetInternal::FindTypeInfo(kind, key));
+    } catch (const dmlc::Error& e) {
+      throw dmlc::Error(": Error when parsing target[\"" + key + "\"]" + e.what());
     }
-    return Map<ObjectRef, ObjectRef>(result);
   }
-  LOG(FATAL) << "Unsupported type registered: \"" << info.type_key
-             << "\", and the type given is: " << obj->GetTypeKey();
-  throw;
+  return TargetInternal::FromConfig(config);
 }
 
-Target Target::FromConfig(const Map<String, ObjectRef>& config_dict) {
+ObjectPtr<Object> TargetInternal::FromConfig(std::unordered_map<String, ObjectRef> config) {
   const String kKind = "kind";
   const String kTag = "tag";
   const String kKeys = "keys";
   const String kDeviceName = "device";
-  std::unordered_map<std::string, ObjectRef> config(config_dict.begin(), config_dict.end());
   ObjectPtr<TargetNode> target = make_object<TargetNode>();
   // parse 'kind'
   if (config.count(kKind)) {
-    const auto* kind = config[kKind].as<StringObj>();
-    CHECK(kind != nullptr) << "AttributeError: Expect type of field 'kind' is string, but get: "
-                           << config[kKind]->GetTypeKey();
-    target->kind = TargetKind::Get(GetRef<String>(kind));
-    config.erase(kKind);
+    if (const auto* kind = config[kKind].as<StringObj>()) {
+      target->kind = GetTargetKind(GetRef<String>(kind));
+      config.erase(kKind);
+    } else {
+      throw dmlc::Error(": Expect type of field \"kind\" is String, but get type: " +
+                        config[kKind]->GetTypeKey());
+    }
   } else {
-    LOG(FATAL) << "AttributeError: Field 'kind' is not found";
+    throw dmlc::Error(": Field \"kind\" is not found");
   }
   // parse "tag"
   if (config.count(kTag)) {
-    const auto* tag = config[kTag].as<StringObj>();
-    CHECK(tag != nullptr) << "AttributeError: Expect type of field 'tag' is string, but get: "
-                          << config[kTag]->GetTypeKey();
-    target->tag = GetRef<String>(tag);
-    config.erase(kTag);
+    if (const auto* tag = config[kTag].as<StringObj>()) {
+      target->tag = GetRef<String>(tag);
+      config.erase(kTag);
+    } else {
+      throw dmlc::Error(": Expect type of field \"tag\" is String, but get type: " +
+                        config[kTag]->GetTypeKey());
+    }
   } else {
     target->tag = "";
   }
   // parse "keys"
-  if (config.count(kKeys)) {
+  {
     std::vector<String> keys;
-    // user provided keys
-    const auto* cfg_keys = config[kKeys].as<ArrayNode>();
-    CHECK(cfg_keys != nullptr)
-        << "AttributeError: Expect type of field 'keys' is an Array, but get: "
-        << config[kKeys]->GetTypeKey();
-    for (const ObjectRef& e : *cfg_keys) {
-      const auto* key = e.as<StringObj>();
-      CHECK(key != nullptr) << "AttributeError: Expect 'keys' to be an array of strings, but it "
-                               "contains an element of type: "
-                            << e->GetTypeKey();
-      keys.push_back(GetRef<String>(key));
+    if (config.count(kKeys)) {
+      // user provided keys
+      if (const auto* cfg_keys = config[kKeys].as<ArrayNode>()) {
+        for (const ObjectRef& e : *cfg_keys) {
+          if (const auto* key = e.as<StringObj>()) {
+            keys.push_back(GetRef<String>(key));
+          } else {
+            throw dmlc::Error(
+                ": Expect 'keys' to be an array of strings, but it "
+                "contains an element of type: " +
+                e->GetTypeKey());
+          }
+        }
+      } else {
+        throw dmlc::Error(": Expect type of field \"keys\" is Array, but get type: " +
+                          config[kKeys]->GetTypeKey());
+      }
     }
     // add device name
-    if (config_dict.count(kDeviceName)) {
-      if (const auto* device = config_dict.at(kDeviceName).as<StringObj>()) {
+    if (config.count(kDeviceName)) {
+      if (const auto* device = config.at(kDeviceName).as<StringObj>()) {
         keys.push_back(GetRef<String>(device));
       }
     }
@@ -443,32 +586,18 @@ Target Target::FromConfig(const Map<String, ObjectRef>& config_dict) {
     // de-duplicate keys
     target->keys = DeduplicateKeys(keys);
     config.erase(kKeys);
-  } else {
-    target->keys = {};
   }
   // parse attrs
   std::unordered_map<String, ObjectRef> attrs;
-  const auto& key2vtype = target->kind->key2vtype_;
   for (const auto& cfg_kv : config) {
-    const String& name = cfg_kv.first;
-    const ObjectRef& obj = cfg_kv.second;
-    if (!key2vtype.count(name)) {
-      std::ostringstream os;
-      os << "AttributeError: Unrecognized config option: \"" << name << "\". Candidates are:";
-      for (const auto& kv : key2vtype) {
-        os << " " << kv.first;
-      }
-      LOG(FATAL) << os.str();
-    }
-    ObjectRef val;
+    const String& key = cfg_kv.first;
+    const ObjectRef& value = cfg_kv.second;
     try {
-      val = target->ParseAttr(obj, key2vtype.at(name));
+      const TargetKindNode::ValueTypeInfo& info = TargetInternal::FindTypeInfo(target->kind, key);
+      attrs[key] = TargetInternal::ParseType(value, info);
     } catch (const dmlc::Error& e) {
-      LOG(FATAL) << "AttributeError: Error occurred in parsing the config key \"" << name
-                 << "\". Details:\n"
-                 << e.what();
+      throw dmlc::Error(": Error when parsing target[\"" + key + "\"]" + e.what());
     }
-    attrs[name] = val;
   }
   // set default attribute values if they do not exist
   for (const auto& kv : target->kind->key2default_) {
@@ -476,125 +605,26 @@ Target Target::FromConfig(const Map<String, ObjectRef>& config_dict) {
       attrs[kv.first] = kv.second;
     }
   }
-  target->attrs = attrs;
-  return Target(target);
-}
-
-/*! \brief Entry to hold the Target context stack. */
-struct TVMTargetThreadLocalEntry {
-  /*! \brief The current target context */
-  std::stack<tvm::Target> context_stack;
-};
-
-/*! \brief Thread local store to hold the Target context stack. */
-using TVMTargetThreadLocalStore = dmlc::ThreadLocalStore<TVMTargetThreadLocalEntry>;
-
-void Target::EnterWithScope() {
-  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
-  entry->context_stack.push(*this);
-}
-
-void Target::ExitWithScope() {
-  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
-  CHECK(!entry->context_stack.empty());
-  CHECK(entry->context_stack.top().same_as(*this));
-  entry->context_stack.pop();
-}
-
-tvm::Target Target::Current(bool allow_not_defined) {
-  TVMTargetThreadLocalEntry* entry = TVMTargetThreadLocalStore::Get();
-  if (entry->context_stack.size() > 0) {
-    return entry->context_stack.top();
+  // do extra pre-processing
+  if (target->kind->preprocessor != nullptr) {
+    target->attrs = target->kind->preprocessor(Map<String, ObjectRef>(attrs));
+  } else {
+    target->attrs = attrs;
   }
-  CHECK(allow_not_defined)
-      << "Target context required. Please set it by constructing a TargetContext";
-
-  return Target();
+  return target;
 }
 
-class Target::Internal {
- public:
-  static void EnterScope(Target target) { target.EnterWithScope(); }
-  static void ExitScope(Target target) { target.ExitWithScope(); }
-};
-
-TVM_REGISTER_GLOBAL("target.TargetCreate").set_body([](TVMArgs args, TVMRetValue* ret) {
-  std::string name = args[0];
-  std::vector<std::string> options;
-  for (int i = 1; i < args.num_args; ++i) {
-    std::string arg = args[i];
-    options.push_back(arg);
-  }
-
-  *ret = Target::CreateTarget(name, options);
-});
-
-TVM_REGISTER_GLOBAL("target.EnterTargetScope").set_body_typed(Target::Internal::EnterScope);
-
-TVM_REGISTER_GLOBAL("target.ExitTargetScope").set_body_typed(Target::Internal::ExitScope);
+/**********  Registry  **********/
 
-TVM_REGISTER_GLOBAL("target.GetCurrentTarget").set_body_typed(Target::Current);
-
-TVM_REGISTER_GLOBAL("target.TargetFromString").set_body_typed(Target::Create);
-
-TVM_REGISTER_GLOBAL("target.TargetFromConfig").set_body_typed(Target::FromConfig);
-
-TVM_REGISTER_GLOBAL("target.TargetExport")
-    .set_body_typed([](Target target) -> Map<String, ObjectRef> { return target->Export(); });
+TVM_REGISTER_GLOBAL("target.Target").set_body(TargetInternal::ConstructorDispatcher);
+TVM_REGISTER_GLOBAL("target.TargetEnterScope").set_body_typed(TargetInternal::EnterScope);
+TVM_REGISTER_GLOBAL("target.TargetExitScope").set_body_typed(TargetInternal::ExitScope);
+TVM_REGISTER_GLOBAL("target.TargetCurrent").set_body_typed(Target::Current);
+TVM_REGISTER_GLOBAL("target.TargetExport").set_body_typed(TargetInternal::Export);
 
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
-    .set_dispatch<TargetNode>([](const ObjectRef& node, ReprPrinter* p) {
-      const auto* target = node.as<TargetNode>();
-      CHECK(target);
-      p->stream << target->str();
+    .set_dispatch<TargetNode>([](const ObjectRef& obj, ReprPrinter* p) {
+      p->stream << Downcast<Target>(obj)->str();
     });
 
-namespace target {
-std::vector<std::string> MergeOptions(std::vector<std::string> opts,
-                                      const std::vector<std::string>& new_opts) {
-  opts.insert(opts.end(), new_opts.begin(), new_opts.end());
-  return opts;
-}
-
-Target llvm(const std::vector<std::string>& options) {
-  return Target::CreateTarget("llvm", options);
-}
-
-Target cuda(const std::vector<std::string>& options) {
-  return Target::CreateTarget("cuda", options);
-}
-
-Target rocm(const std::vector<std::string>& options) {
-  return Target::CreateTarget("rocm", options);
-}
-
-Target opencl(const std::vector<std::string>& options) {
-  return Target::CreateTarget("opencl", options);
-}
-
-Target metal(const std::vector<std::string>& options) {
-  return Target::CreateTarget("metal", options);
-}
-
-Target mali(const std::vector<std::string>& options) {
-  return Target::CreateTarget("opencl", MergeOptions(options, {"-device=mali"}));
-}
-
-Target intel_graphics(const std::vector<std::string>& options) {
-  return Target::CreateTarget(
-      "opencl", MergeOptions(options, {"-device=intel_graphics", "-thread_warp_size=16"}));
-}
-
-Target stackvm(const std::vector<std::string>& options) {
-  return Target::CreateTarget("stackvm", options);
-}
-
-Target ext_dev(const std::vector<std::string>& options) {
-  return Target::CreateTarget("ext_dev", options);
-}
-
-Target hexagon(const std::vector<std::string>& options) {
-  return Target::CreateTarget("hexagon", options);
-}
-}  // namespace target
 }  // namespace tvm
diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
index 29f16925968d..efb9d16bb7b4 100644
--- a/src/target/target_kind.cc
+++ b/src/target/target_kind.cc
@@ -21,6 +21,9 @@
  * \file src/target/target_kind.cc
  * \brief Target kind registry
  */
+#include <tvm/ir/expr.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/target/target.h>
 #include <tvm/target/target_kind.h>
 
 #include <algorithm>
@@ -32,11 +35,13 @@ namespace tvm {
 TVM_REGISTER_NODE_TYPE(TargetKindNode);
 
 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
-    .set_dispatch<TargetKindNode>([](const ObjectRef& node, ReprPrinter* p) {
-      auto* op = static_cast<const TargetKindNode*>(node.get());
-      p->stream << op->name;
+    .set_dispatch<TargetKindNode>([](const ObjectRef& obj, ReprPrinter* p) {
+      const TargetKind& kind = Downcast<TargetKind>(obj);
+      p->stream << kind->name;
     });
 
+/**********  Registry-related code  **********/
+
 using TargetKindRegistry = AttrRegistry<TargetKindRegEntry, TargetKind>;
 
 TargetKindRegEntry& TargetKindRegEntry::RegisterOrGet(const String& target_kind_name) {
@@ -52,178 +57,238 @@ const AttrRegistryMapContainerMap<TargetKind>& TargetKind::GetAttrMapContainer(
   return TargetKindRegistry::Global()->GetAttrMap(attr_name);
 }
 
-const TargetKind& TargetKind::Get(const String& target_kind_name) {
+Optional<TargetKind> TargetKind::Get(const String& target_kind_name) {
   const TargetKindRegEntry* reg = TargetKindRegistry::Global()->Get(target_kind_name);
-  CHECK(reg != nullptr) << "ValueError: TargetKind \"" << target_kind_name
-                        << "\" is not registered";
+  if (reg == nullptr) {
+    return NullOpt;
+  }
   return reg->kind_;
 }
 
-// TODO(@junrushao1994): remove some redundant attributes
+/**********  Utility functions  **********/
 
-TVM_REGISTER_TARGET_KIND("llvm")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
-    .add_attr_option<Bool>("system-lib")
-    .add_attr_option<String>("runtime")
-    .add_attr_option<String>("mcpu")
+/*!
+ * \brief Extract a number from the string with the given prefix.
+ * For example, when `str` is "sm_20" and `prefix` is "sm_".
+ * This function first checks if `str` starts with `prefix`,
+ * then return the integer 20 after the `prefix`
+ * \param str The string to be extracted
+ * \param prefix The prefix to be checked
+ * \return An integer, the extracted number. -1 if the check fails
+ */
+static int ExtractIntWithPrefix(const std::string& str, const std::string& prefix) {
+  if (str.substr(0, prefix.size()) != prefix) {
+    return -1;
+  }
+  int result = 0;
+  for (size_t i = prefix.size(); i < str.size(); ++i) {
+    char c = str[i];
+    if (!isdigit(c)) {
+      return -1;
+    }
+    result = result * 10 + c - '0';
+  }
+  return result;
+}
+
+/*!
+ * \brief Using TVM DeviceAPI to detect the device flag
+ * \param device The device to be detected
+ * \param flag The device flag to be detected
+ * \param val The detected value
+ * \return A boolean indicating if detection succeeds
+ */
+static bool DetectDeviceFlag(TVMContext device, runtime::DeviceAttrKind flag, TVMRetValue* val) {
+  using runtime::DeviceAPI;
+  DeviceAPI* api = DeviceAPI::Get(device, true);
+  // Check if compiled with the corresponding device api
+  if (api == nullptr) {
+    return false;
+  }
+  // Check if the device exists
+  api->GetAttr(device, runtime::kExist, val);
+  int exists = *val;
+  if (!exists) {
+    return false;
+  }
+  // Get the arch of the device
+  DeviceAPI::Get(device)->GetAttr(device, flag, val);
+  return true;
+}
+
+void CheckOrSetAttr(Map<String, ObjectRef>* attrs, const String& name, const String& value) {
+  auto iter = attrs->find(name);
+  if (iter == attrs->end()) {
+    attrs->Set(name, value);
+  } else {
+    const auto* str = (*iter).second.as<StringObj>();
+    CHECK(str != nullptr && GetRef<String>(str) == value)
+        << "ValueError: Expects \"" << name << "\" to be \"" << value
+        << "\", but gets: " << (*iter).second;
+  }
+}
+
+/**********  Target kind attribute updaters  **********/
+
+/*!
+ * \brief Update the attributes in the LLVM NVPTX target.
+ * \param attrs The original attributes
+ * \return The updated attributes
+ */
+Map<String, ObjectRef> UpdateNVPTXAttrs(Map<String, ObjectRef> attrs) {
+  CheckOrSetAttr(&attrs, "mtriple", "nvptx64-nvidia-cuda");
+  // Update -mcpu=sm_xx
+  int arch;
+  if (attrs.count("mcpu")) {
+    // If -mcpu has been specified, validate the correctness
+    String mcpu = Downcast<String>(attrs.at("mcpu"));
+    arch = ExtractIntWithPrefix(mcpu, "sm_");
+    CHECK(arch != -1) << "ValueError: NVPTX target gets an invalid CUDA arch: -mcpu=" << mcpu;
+  } else {
+    // Use the compute version of the first CUDA GPU instead
+    TVMRetValue version;
+    if (!DetectDeviceFlag({kDLGPU, 0}, runtime::kComputeVersion, &version)) {
+      LOG(WARNING) << "Unable to detect CUDA version, default to \"-mcpu=sm_20\" instead";
+      arch = 20;
+    } else {
+      arch = std::stod(version.operator std::string()) * 10 + 0.1;
+    }
+    attrs.Set("mcpu", String("sm_") + std::to_string(arch));
+  }
+  return attrs;
+}
+
+/*!
+ * \brief Update the attributes in the LLVM ROCm target.
+ * \param attrs The original attributes
+ * \return The updated attributes
+ */
+Map<String, ObjectRef> UpdateROCmAttrs(Map<String, ObjectRef> attrs) {
+  CheckOrSetAttr(&attrs, "mtriple", "amdgcn-amd-amdhsa-hcc");
+  // Update -mcpu=gfx
+  int arch;
+  if (attrs.count("mcpu")) {
+    String mcpu = Downcast<String>(attrs.at("mcpu"));
+    arch = ExtractIntWithPrefix(mcpu, "gfx");
+    CHECK(arch != -1) << "ValueError: ROCm target gets an invalid GFX version: -mcpu=" << mcpu;
+  } else {
+    TVMRetValue version;
+    if (!DetectDeviceFlag({kDLROCM, 0}, runtime::kApiVersion, &version)) {
+      LOG(WARNING) << "Unable to detect ROCm version, default to \"-mcpu=gfx305\" instead";
+      arch = 305;
+    } else {
+      arch = version.operator int();
+    }
+    attrs.Set("mcpu", String("gfx") + std::to_string(arch));
+  }
+  // Update -mattr before ROCm 3.5:
+  //   Before ROCm 3.5 we needed code object v2, starting
+  //   with 3.5 we need v3 (this argument disables v3)
+  if (arch < 305) {
+    Array<String> mattr;
+    if (attrs.count("mattr")) {
+      mattr = Downcast<Array<String>>(attrs.at("mattr"));
+    }
+    mattr.push_back("-code-object-v3");
+    attrs.Set("mattr", mattr);
+  }
+  return attrs;
+}
+
+/**********  Register Target kinds and attributes  **********/
+
+TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
     .add_attr_option<Array<String>>("mattr")
+    .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
     .add_attr_option<String>("mfloat-abi")
-    .set_default_keys({"cpu"})
-    .set_device_type(kDLCPU);
-
-TVM_REGISTER_TARGET_KIND("c")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<String>("runtime")
-    .set_default_keys({"cpu"})
-    .set_device_type(kDLCPU);
-
-TVM_REGISTER_TARGET_KIND("cuda")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"cpu"});
+
+TVM_REGISTER_TARGET_KIND("c", kDLCPU)
     .add_attr_option<Bool>("system-lib")
-    .add_attr_option<Integer>("max_num_threads", Integer(1024))
-    .add_attr_option<Integer>("thread_warp_size", Integer(32))
-    .add_attr_option<String>("mcpu")
-    .set_default_keys({"cuda", "gpu"})
-    .set_device_type(kDLGPU);
+    .add_attr_option<String>("runtime")
+    .set_default_keys({"cpu"});
 
-TVM_REGISTER_TARGET_KIND("nvptx")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+TVM_REGISTER_TARGET_KIND("cuda", kDLGPU)
+    .add_attr_option<String>("mcpu")
+    .add_attr_option<String>("arch")
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(1024))
     .add_attr_option<Integer>("thread_warp_size", Integer(32))
+    .set_default_keys({"cuda", "gpu"});
+
+TVM_REGISTER_TARGET_KIND("nvptx", kDLGPU)
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
+    .add_attr_option<Bool>("system-lib")
+    .add_attr_option<Integer>("max_num_threads", Integer(1024))
+    .add_attr_option<Integer>("thread_warp_size", Integer(32))
     .set_default_keys({"cuda", "gpu"})
-    .set_device_type(kDLGPU);
+    .set_attrs_preprocessor(UpdateNVPTXAttrs);
 
-TVM_REGISTER_TARGET_KIND("rocm")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+TVM_REGISTER_TARGET_KIND("rocm", kDLROCM)
+    .add_attr_option<String>("mcpu")
+    .add_attr_option<String>("mtriple")
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("thread_warp_size", Integer(64))
     .set_default_keys({"rocm", "gpu"})
-    .set_device_type(kDLROCM);
+    .set_attrs_preprocessor(UpdateROCmAttrs);
 
-TVM_REGISTER_TARGET_KIND("opencl")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+TVM_REGISTER_TARGET_KIND("opencl", kDLOpenCL)
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("thread_warp_size")
-    .set_default_keys({"opencl", "gpu"})
-    .set_device_type(kDLOpenCL);
-
-TVM_REGISTER_TARGET_KIND("metal")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"opencl", "gpu"});
+
+TVM_REGISTER_TARGET_KIND("metal", kDLMetal)
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(256))
-    .set_default_keys({"metal", "gpu"})
-    .set_device_type(kDLMetal);
-
-TVM_REGISTER_TARGET_KIND("vulkan")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"metal", "gpu"});
+
+TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(256))
-    .set_default_keys({"vulkan", "gpu"})
-    .set_device_type(kDLVulkan);
-
-TVM_REGISTER_TARGET_KIND("webgpu")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"vulkan", "gpu"});
+
+TVM_REGISTER_TARGET_KIND("webgpu", kDLWebGPU)
     .add_attr_option<Bool>("system-lib")
     .add_attr_option<Integer>("max_num_threads", Integer(256))
-    .set_default_keys({"webgpu", "gpu"})
-    .set_device_type(kDLWebGPU);
-
-TVM_REGISTER_TARGET_KIND("sdaccel")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"webgpu", "gpu"});
+
+TVM_REGISTER_TARGET_KIND("sdaccel", kDLOpenCL)
     .add_attr_option<Bool>("system-lib")
-    .set_default_keys({"sdaccel", "hls"})
-    .set_device_type(kDLOpenCL);
-
-TVM_REGISTER_TARGET_KIND("aocl")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"sdaccel", "hls"});
+
+TVM_REGISTER_TARGET_KIND("aocl", kDLAOCL)
     .add_attr_option<Bool>("system-lib")
-    .set_default_keys({"aocl", "hls"})
-    .set_device_type(kDLAOCL);
-
-TVM_REGISTER_TARGET_KIND("aocl_sw_emu")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
+    .set_default_keys({"aocl", "hls"});
+
+TVM_REGISTER_TARGET_KIND("aocl_sw_emu", kDLAOCL)
     .add_attr_option<Bool>("system-lib")
-    .set_default_keys({"aocl", "hls"})
-    .set_device_type(kDLAOCL);
-
-TVM_REGISTER_TARGET_KIND("hexagon")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
-    .add_attr_option<String>("mcpu")
+    .set_default_keys({"aocl", "hls"});
+
+TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
     .add_attr_option<Array<String>>("mattr")
+    .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
-    .add_attr_option<Array<String>>("llvm-options")
     .add_attr_option<Bool>("system-lib")
-    .set_default_keys({"hexagon"})
-    .set_device_type(kDLHexagon);
-
-TVM_REGISTER_TARGET_KIND("stackvm")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
-    .add_attr_option<Bool>("system-lib")
-    .set_device_type(kDLCPU);
+    .add_attr_option<Array<String>>("llvm-options")
+    .set_default_keys({"hexagon"});
 
-TVM_REGISTER_TARGET_KIND("ext_dev")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
-    .add_attr_option<Bool>("system-lib")
-    .set_device_type(kDLExtDev);
+TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU)  // line break
+    .add_attr_option<Bool>("system-lib");
 
-TVM_REGISTER_TARGET_KIND("hybrid")
-    .add_attr_option<Array<String>>("keys")
-    .add_attr_option<Array<String>>("libs")
-    .add_attr_option<String>("device")
-    .add_attr_option<String>("model")
-    .add_attr_option<Bool>("system-lib")
-    .set_device_type(kDLCPU);
+TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev)  // line break
+    .add_attr_option<Bool>("system-lib");
+
+TVM_REGISTER_TARGET_KIND("hybrid", kDLCPU)  // line break
+    .add_attr_option<Bool>("system-lib");
+
+TVM_REGISTER_TARGET_KIND("composite", kDLCPU)
+    .add_attr_option<Target>("target_host")
+    .add_attr_option<Array<Target>>("devices");
 
 }  // namespace tvm
diff --git a/src/topi/schedule.cc b/src/topi/schedule.cc
index 333833a4ce5d..ead803b84cf9 100644
--- a/src/topi/schedule.cc
+++ b/src/topi/schedule.cc
@@ -55,7 +55,7 @@ using namespace tvm;
 using namespace tvm::runtime;
 
 TVM_REGISTER_GLOBAL("topi.TEST_create_target").set_body([](TVMArgs args, TVMRetValue* rv) {
-  *rv = tvm::Target::Create(args[0]);
+  *rv = tvm::Target(args[0].operator String());
 });
 
 /* Generic schedules */
diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc
index 48edfcd024f5..62c37f827cd5 100644
--- a/tests/cpp/build_module_test.cc
+++ b/tests/cpp/build_module_test.cc
@@ -50,12 +50,12 @@ TEST(BuildModule, Basic) {
   auto args = Array<Tensor>({A, B, C});
   std::unordered_map<Tensor, Buffer> binds;
 
-  auto target = target::llvm();
+  auto target = Target("llvm");
 
   auto lowered = lower(s, args, "func", binds);
   auto module = build(lowered, target, Target());
 
-  auto mali_target = Target::Create("opencl -model=Mali-T860MP4@800Mhz -device=mali");
+  auto mali_target = Target("opencl -model=Mali-T860MP4@800Mhz -device=mali");
   CHECK_EQ(mali_target->kind->name, "opencl");
   CHECK_EQ(mali_target->keys.size(), 3);
   CHECK_EQ(mali_target->keys[0], "mali");
@@ -88,8 +88,8 @@ TEST(BuildModule, Heterogeneous) {
     return;
   }
 
-  auto target_llvm = target::llvm();
-  auto target_cuda = target::cuda();
+  auto target_llvm = Target("llvm");
+  auto target_cuda = Target("cuda");
 
   // The shape of input tensors.
   const int n = 4;
diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc
index 5de4ada6cb7b..2b5eb961e6b2 100644
--- a/tests/cpp/relay_build_module_test.cc
+++ b/tests/cpp/relay_build_module_test.cc
@@ -113,7 +113,7 @@ TEST(Relay, BuildModule) {
   auto json_f = build_mod.GetFunction("get_graph_json", false);
   auto mod_f = build_mod.GetFunction("get_module", false);
   Map<tvm::Integer, tvm::Target> targets;
-  Target llvm_tgt = Target::Create("llvm");
+  Target llvm_tgt = Target("llvm");
   targets.Set(0, llvm_tgt);
   auto relay_mod = tvm::IRModule::FromExpr(func);
   build_f(relay_mod, targets, llvm_tgt);
diff --git a/tests/cpp/relay_transform_sequential_test.cc b/tests/cpp/relay_transform_sequential_test.cc
index 1a12aec7054d..f052d66fbc5a 100644
--- a/tests/cpp/relay_transform_sequential_test.cc
+++ b/tests/cpp/relay_transform_sequential_test.cc
@@ -95,7 +95,7 @@ TEST(Relay, Sequential) {
   pass_ctx->config.Set("relay.fallback_device_type", Integer(1));
   {
     tvm::With<relay::transform::PassContext> ctx_scope(pass_ctx);
-    tvm::With<tvm::Target> tctx(tvm::Target::Create("llvm"));
+    tvm::With<tvm::Target> tctx(tvm::Target("llvm"));
     mod = seq(mod);
   }
 
diff --git a/tests/cpp/target_test.cc b/tests/cpp/target_test.cc
index e8748e63295a..b4c53ab84520 100644
--- a/tests/cpp/target_test.cc
+++ b/tests/cpp/target_test.cc
@@ -19,6 +19,7 @@
 
 #include <dmlc/logging.h>
 #include <gtest/gtest.h>
+#include <tvm/ir/expr.h>
 #include <tvm/target/target.h>
 
 #include <cmath>
@@ -26,7 +27,7 @@
 
 using namespace tvm;
 
-TVM_REGISTER_TARGET_KIND("TestTargetKind")
+TVM_REGISTER_TARGET_KIND("TestTargetKind", kDLCPU)
     .set_attr<std::string>("Attr1", "Value1")
     .add_attr_option<Bool>("my_bool")
     .add_attr_option<Array<String>>("your_names")
@@ -34,7 +35,7 @@ TVM_REGISTER_TARGET_KIND("TestTargetKind")
 
 TEST(TargetKind, GetAttrMap) {
   auto map = tvm::TargetKind::GetAttrMap<std::string>("Attr1");
-  auto target_kind = tvm::TargetKind::Get("TestTargetKind");
+  auto target_kind = tvm::TargetKind::Get("TestTargetKind").value();
   std::string result = map[target_kind];
   CHECK_EQ(result, "Value1");
 }
@@ -52,8 +53,8 @@ TEST(TargetCreation, NestedConfig) {
           },
       },
   };
-  Target target = Target::FromConfig(config);
-  CHECK_EQ(target->kind, TargetKind::Get("TestTargetKind"));
+  Target target = Target(config);
+  CHECK_EQ(target->kind, TargetKind::Get("TestTargetKind").value());
   CHECK_EQ(target->tag, "");
   CHECK(target->keys.empty());
   Bool my_bool = target->GetAttr<Bool>("my_bool").value();
@@ -84,7 +85,7 @@ TEST(TargetCreationFail, UnrecognizedConfigOption) {
   };
   bool failed = false;
   try {
-    Target::FromConfig(config);
+    Target tgt(config);
   } catch (...) {
     failed = true;
   }
@@ -106,7 +107,7 @@ TEST(TargetCreationFail, TypeMismatch) {
   };
   bool failed = false;
   try {
-    Target::FromConfig(config);
+    Target tgt(config);
   } catch (...) {
     failed = true;
   }
@@ -127,7 +128,7 @@ TEST(TargetCreationFail, TargetKindNotFound) {
   };
   bool failed = false;
   try {
-    Target::FromConfig(config);
+    Target tgt(config);
   } catch (...) {
     failed = true;
   }
@@ -140,8 +141,8 @@ TEST(TargetCreation, DeduplicateKeys) {
       {"keys", Array<String>{"cpu", "arm_cpu"}},
       {"device", String("arm_cpu")},
   };
-  Target target = Target::FromConfig(config);
-  CHECK_EQ(target->kind, TargetKind::Get("llvm"));
+  Target target = Target(config);
+  CHECK_EQ(target->kind, TargetKind::Get("llvm").value());
   CHECK_EQ(target->tag, "");
   CHECK_EQ(target->keys.size(), 2U);
   CHECK_EQ(target->keys[0], "cpu");
diff --git a/tests/cpp/utvm_runtime_standalone_test.cc b/tests/cpp/utvm_runtime_standalone_test.cc
index 6f9577463310..39449ee215f2 100644
--- a/tests/cpp/utvm_runtime_standalone_test.cc
+++ b/tests/cpp/utvm_runtime_standalone_test.cc
@@ -89,7 +89,7 @@ TEST(MicroStandaloneRuntime, BuildModule) {
   auto mod_f = build_mod.GetFunction("get_module", false);
   Map<tvm::Integer, tvm::Target> targets;
 
-  Target llvm_tgt = Target::Create("llvm");
+  Target llvm_tgt = Target("llvm");
   targets.Set(0, llvm_tgt);
   build_f(func, targets, llvm_tgt);
   std::string json = json_f();
diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
index b43d273553e5..070348f7373d 100644
--- a/tests/python/contrib/test_ethosn/infrastructure.py
+++ b/tests/python/contrib/test_ethosn/infrastructure.py
@@ -89,7 +89,7 @@ def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1):
     with tvm.transform.PassContext(opt_level=3, config={
             "relay.ext.ethos-n.options": {"variant": 0}
     }):
-        with tvm.target.create("llvm"):
+        with tvm.target.Target("llvm"):
             if npu:
                 f = relay.build_module.bind_params_by_name(mod["main"], params)
                 mod = tvm.IRModule()
@@ -165,7 +165,7 @@ def inference_result(checksum, outputs):
 def test_error(mod, params, err_msg):
     caught = None
     with tvm.transform.PassContext(opt_level=3):
-        with tvm.target.create("llvm"):
+        with tvm.target.Target("llvm"):
             try:
                 relay.build(mod, params)
             except tvm.error.TVMError as e:
diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py
index d2fb503c3975..6195bf6247d2 100644
--- a/tests/python/integration/test_ewise.py
+++ b/tests/python/integration/test_ewise.py
@@ -73,7 +73,7 @@ def check_device(device):
             if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
-            target = tvm.target.create(device)
+            target = tvm.target.Target(device)
             if "cpu" not in target.keys:
                 s[C].bind(bx, te.thread_axis("blockIdx.x"))
                 s[C].bind(tx, te.thread_axis("threadIdx.x"))
@@ -192,7 +192,7 @@ def check_device(device):
             if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
-            target = tvm.target.create(device)
+            target = tvm.target.Target(device)
             if "cpu" not in target.keys:
                 s[B].bind(bx, te.thread_axis("blockIdx.x"))
                 s[B].bind(tx, te.thread_axis("threadIdx.x"))
diff --git a/tests/python/integration/test_gemm.py b/tests/python/integration/test_gemm.py
index 1b7d54e2177f..b2698f388b53 100644
--- a/tests/python/integration/test_gemm.py
+++ b/tests/python/integration/test_gemm.py
@@ -88,7 +88,7 @@ def check_device(device):
             print("skip because %s is not enabled.." % device)
             return
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             f = tvm.build(s, [A, B, C])
 
         # launch the kernel.
diff --git a/tests/python/integration/test_winograd_nnpack.py b/tests/python/integration/test_winograd_nnpack.py
index e6841ddc8132..d85e5295911d 100644
--- a/tests/python/integration/test_winograd_nnpack.py
+++ b/tests/python/integration/test_winograd_nnpack.py
@@ -64,7 +64,7 @@ def check_device(device):
         if not tvm.testing.device_enabled(device):
             print("Skipping %s becuase it is not enabled" % device)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.nn.conv2d(A, W, stride, padding, dilation, layout='NCHW', out_dtype=dtype)
             if add_bias:
                 C = topi.add(C, bias)
diff --git a/tests/python/relay/test_backend_compile_engine.py b/tests/python/relay/test_backend_compile_engine.py
index 906882d774fd..0b0fd58064ff 100644
--- a/tests/python/relay/test_backend_compile_engine.py
+++ b/tests/python/relay/test_backend_compile_engine.py
@@ -81,7 +81,7 @@ def _create_record(task_name, dshape, wshape, target, cost):
     return (inp, result)
 
 def test_get_valid_implementations():
-    target = tvm.target.create("llvm")
+    target = tvm.target.Target("llvm")
 
     def _get_impls(dshape, wshape):
         data = relay.var("data", shape=dshape)
@@ -102,7 +102,7 @@ def _get_impls(dshape, wshape):
         assert len(impls) == 3
 
 def test_select_implementation():
-    target = tvm.target.create("llvm")
+    target = tvm.target.Target("llvm")
 
     def _select_impl(dshape, wshape, use_autotvm=False):
         data = relay.var("data", shape=dshape)
diff --git a/tests/python/relay/test_backend_interpreter.py b/tests/python/relay/test_backend_interpreter.py
index 41a07e4dac51..39786fd6963f 100644
--- a/tests/python/relay/test_backend_interpreter.py
+++ b/tests/python/relay/test_backend_interpreter.py
@@ -219,7 +219,7 @@ def test_tuple_passing():
     mod = relay.transform.InferType()(mod)
 
     ctx = tvm.cpu()
-    target = tvm.target.create('llvm')
+    target = tvm.target.Target('llvm')
     exec = relay.create_executor(mod=mod, ctx=ctx, target=target)
     f = exec.evaluate(gv)
     # First use a Python tuple.
diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py
index 3bd82b2a9cf4..2a1b983a6691 100644
--- a/tests/python/relay/test_pass_alter_op_layout.py
+++ b/tests/python/relay/test_pass_alter_op_layout.py
@@ -685,7 +685,7 @@ def before():
 
     from tvm import topi
     def alter_conv2d(attrs, inputs, tinfos, out_type):
-        with tvm.target.create("llvm"):
+        with tvm.target.Target("llvm"):
             return topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, out_type)
 
 
@@ -1019,7 +1019,7 @@ def test_alter_layout_nhwc_arm():
     """ Check that AlterOplayout does not alter NHWC data layout. """
     def alter_conv2d(attrs, inputs, tinfos, out_type):
         from tvm import topi
-        with tvm.target.create("llvm -device=arm_cpu"):
+        with tvm.target.Target("llvm -device=arm_cpu"):
             return topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, out_type)
 
     # Check NHWC conversion.
@@ -1080,7 +1080,7 @@ def update(self, target, workload, cfg):
 
     def alter_conv2d(attrs, inputs, tinfos, out_type):
         from tvm import topi
-        with tvm.target.create("llvm -device=arm_cpu -mtriple=aarch64-linux-gnu"):
+        with tvm.target.Target("llvm -device=arm_cpu -mtriple=aarch64-linux-gnu"):
             with Int8Fallback():
                 tmp =  topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, out_type)
                 return tmp
diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py
index bf9452a9db0a..3558ebce9b16 100644
--- a/tests/python/relay/test_pass_auto_quantize.py
+++ b/tests/python/relay/test_pass_auto_quantize.py
@@ -88,7 +88,7 @@ def test_calibrate_target(create_target):
     dataset = get_calibration_dataset(mod, "data")
     with relay.quantize.qconfig(calibrate_mode="kl_divergence"):
         if create_target:
-            with tvm.target.create("llvm"):
+            with tvm.target.Target("llvm"):
                 relay.quantize.quantize(mod, params, dataset)
         else:
             # current_target = None
diff --git a/tests/python/relay/test_pass_fold_constant.py b/tests/python/relay/test_pass_fold_constant.py
index e0c813d4385d..a496e1d34c99 100644
--- a/tests/python/relay/test_pass_fold_constant.py
+++ b/tests/python/relay/test_pass_fold_constant.py
@@ -71,7 +71,7 @@ def expected():
         return relay.Function([x], z)
 
     # the fold constant should work on any context.
-    with tvm.target.create("cuda"):
+    with tvm.target.Target("cuda"):
         zz = run_opt_pass(before(), transform.FoldConstant())
     zexpected = run_opt_pass(expected(), transform.InferType())
     assert tvm.ir.structural_equal(zz, zexpected)
diff --git a/tests/python/relay/test_pass_manager.py b/tests/python/relay/test_pass_manager.py
index 9245bbde3544..65ebf76a58b5 100644
--- a/tests/python/relay/test_pass_manager.py
+++ b/tests/python/relay/test_pass_manager.py
@@ -495,7 +495,7 @@ def expected():
 
     mod = tvm.IRModule({"main": before()})
     with tvm.transform.PassContext(opt_level=3):
-        with tvm.target.create("llvm"):
+        with tvm.target.Target("llvm"):
             mod = seq(mod)
 
     zz = mod["main"]
diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py
index c50c7c491c35..cf7210743ac9 100644
--- a/tests/python/relay/test_pass_qnn_legalize.py
+++ b/tests/python/relay/test_pass_qnn_legalize.py
@@ -128,12 +128,12 @@ def _get_mod(data_dtype, kernel_dtype):
         # Check transformations for platforms with fast Int8 support.
         #############################################################
         # Check that Intel VNNI gets picked up.
-        with tvm.target.create('llvm -mcpu=skylake-avx512'):
+        with tvm.target.Target('llvm -mcpu=skylake-avx512'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn.conv2d" in legalized_mod.astext()
 
         # Since same dtype, there should not be any transformation
-        with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
+        with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert tvm.ir.structural_equal(mod, legalized_mod)
 
@@ -141,12 +141,12 @@ def _get_mod(data_dtype, kernel_dtype):
         # Check transformations for platforms without fast Int8 support.
         ################################################################
         # Older Intel versions.
-        with tvm.target.create('llvm'):
+        with tvm.target.Target('llvm'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
         # Older ARM vesions.
-        with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
+        with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
@@ -156,12 +156,12 @@ def _get_mod(data_dtype, kernel_dtype):
     # Check transformations for platforms with fast Int8 support.
     #############################################################
     # Check no transformation for Intel VNNI.
-    with tvm.target.create('llvm -mcpu=skylake-avx512'):
+    with tvm.target.Target('llvm -mcpu=skylake-avx512'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert tvm.ir.structural_equal(mod, legalized_mod)
 
     # ARM - so check that transformation has happened.
-    with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
+    with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn.conv2d" in legalized_mod.astext()
 
@@ -169,19 +169,19 @@ def _get_mod(data_dtype, kernel_dtype):
     # Check transformations for platforms without fast Int8 support.
     ################################################################
     # Older Intel versions.
-    with tvm.target.create('llvm'):
+    with tvm.target.Target('llvm'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
     # Older ARM vesions.
-    with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
+    with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
     ###########################################
     # Check transformations for CUDA platforms.
     ###########################################
-    with tvm.target.create('cuda'):
+    with tvm.target.Target('cuda'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" in legalized_mod.astext()
 
@@ -215,12 +215,12 @@ def _get_mod(data_dtype, kernel_dtype):
         # Check transformations for platforms with fast Int8 support.
         #############################################################
         # Check that Intel VNNI gets picked up.
-        with tvm.target.create('llvm -mcpu=skylake-avx512'):
+        with tvm.target.Target('llvm -mcpu=skylake-avx512'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn.dense" in legalized_mod.astext()
 
         # Since same dtype, there should not be any transformation
-        with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
+        with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert tvm.ir.structural_equal(mod, legalized_mod)
 
@@ -228,12 +228,12 @@ def _get_mod(data_dtype, kernel_dtype):
         # Check transformations for platforms without fast Int8 support.
         ################################################################
         # Older Intel versions.
-        with tvm.target.create('llvm'):
+        with tvm.target.Target('llvm'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
         # Older ARM vesions.
-        with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
+        with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
             legalized_mod = relay.qnn.transform.Legalize()(mod)
             assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
@@ -243,12 +243,12 @@ def _get_mod(data_dtype, kernel_dtype):
     # Check transformations for platforms with fast Int8 support.
     #############################################################
     # Check no transformation for Intel VNNI.
-    with tvm.target.create('llvm -mcpu=skylake-avx512'):
+    with tvm.target.Target('llvm -mcpu=skylake-avx512'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert tvm.ir.structural_equal(mod, legalized_mod)
 
     # ARM - so check that transformation has happened.
-    with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
+    with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+v8.2a,+dotprod'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn.dense" in legalized_mod.astext()
 
@@ -256,19 +256,19 @@ def _get_mod(data_dtype, kernel_dtype):
     # Check transformations for platforms without fast Int8 support.
     ################################################################
     # Older Intel versions.
-    with tvm.target.create('llvm'):
+    with tvm.target.Target('llvm'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
     # Older ARM vesions.
-    with tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
+    with tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" not in legalized_mod.astext()
 
     ###########################################
     # Check transformations for CUDA platforms.
     ###########################################
-    with tvm.target.create('cuda'):
+    with tvm.target.Target('cuda'):
         legalized_mod = relay.qnn.transform.Legalize()(mod)
         assert 'cast' in legalized_mod.astext() and "qnn" in legalized_mod.astext()
 
diff --git a/tests/python/topi/python/test_fifo_buffer.py b/tests/python/topi/python/test_fifo_buffer.py
index 8e69a7639358..0df4822dfca0 100644
--- a/tests/python/topi/python/test_fifo_buffer.py
+++ b/tests/python/topi/python/test_fifo_buffer.py
@@ -49,7 +49,7 @@ def get_ref_data():
     def check_device(device, ctx):
         print('  Running on target: {}'.format(device))
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             out = topi.nn.fifo_buffer(data, buffer, axis=axis)
             s = tvm.topi.testing.get_injective_schedule(device)([out])
 
@@ -123,7 +123,7 @@ def check_device(device, ctx):
 
         conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(device)
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             out = topi.nn.fifo_buffer(inc_input, context, axis=buffer_axis)
             s = tvm.topi.testing.get_injective_schedule(device)([out])
             update_context = tvm.build(s, [inc_input, context, out], device, name='update_context')
diff --git a/tests/python/topi/python/test_topi_batch_matmul.py b/tests/python/topi/python/test_topi_batch_matmul.py
index c785c6d85108..c23716c88635 100644
--- a/tests/python/topi/python/test_topi_batch_matmul.py
+++ b/tests/python/topi/python/test_topi_batch_matmul.py
@@ -48,7 +48,7 @@ def get_ref_data():
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _batch_matmul_implement)
             out = fcompute(x, y)
             s = fschedule([out])
diff --git a/tests/python/topi/python/test_topi_bitserial_conv2d.py b/tests/python/topi/python/test_topi_bitserial_conv2d.py
index 74a3a03d2b4b..91d6da2aa588 100644
--- a/tests/python/topi/python/test_topi_bitserial_conv2d.py
+++ b/tests/python/topi/python/test_topi_bitserial_conv2d.py
@@ -33,7 +33,7 @@ def verify_bitserial_conv2d_nchw(batch, in_size, in_channel, num_filter, kernel,
     input_dtype = 'uint32'
     out_dtype = 'int32'
 
-    with tvm.target.create('llvm'):
+    with tvm.target.Target('llvm'):
         A = te.placeholder((batch, in_channel, in_height, in_width), dtype=input_dtype, name='A')
         W = te.placeholder((num_filter, in_channel, kernel, kernel), dtype=input_dtype, name='W')
         B = topi.x86.bitserial_conv2d_nchw(A, W, stride, padding, activation_bits, weight_bits,
@@ -71,7 +71,7 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel,
     input_dtype='uint32'
     out_dtype='int32'
 
-    with tvm.target.create('llvm'):
+    with tvm.target.Target('llvm'):
         A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_dtype, name='A')
         W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_dtype, name='W')
         B = topi.x86.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits,
diff --git a/tests/python/topi/python/test_topi_bitserial_conv2d_rasp.py b/tests/python/topi/python/test_topi_bitserial_conv2d_rasp.py
index 7b32c79e1761..659ee214f9d1 100644
--- a/tests/python/topi/python/test_topi_bitserial_conv2d_rasp.py
+++ b/tests/python/topi/python/test_topi_bitserial_conv2d_rasp.py
@@ -37,7 +37,7 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel,
     out_dtype = 'int16'
 
     device = 'llvm -device=arm_cpu -model=bcm2837 -mtriple=armv7l-linux-gnueabihf -mattr=+neon'
-    with tvm.target.create(device):
+    with tvm.target.Target(device):
         A = te.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A')
         W = te.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W')
         B = topi.arm_cpu.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits,
diff --git a/tests/python/topi/python/test_topi_bnn.py b/tests/python/topi/python/test_topi_bnn.py
index ac1646005a1d..95d3e0457e0c 100644
--- a/tests/python/topi/python/test_topi_bnn.py
+++ b/tests/python/topi/python/test_topi_bnn.py
@@ -33,7 +33,7 @@ def verify_binary_dense(batch, in_dim, out_dim):
     bnn_B1 = te.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
     bnn_C = topi.nn.binary_dense(bnn_A1, bnn_B1)
     # schedule
-    with tvm.target.create('llvm'):
+    with tvm.target.Target('llvm'):
         s1 = topi.x86.schedule_binarize_pack(bnn_A)
         s2 = topi.x86.schedule_binarize_pack(bnn_B)
         s3 = topi.x86.schedule_binary_dense(bnn_C)
diff --git a/tests/python/topi/python/test_topi_broadcast.py b/tests/python/topi/python/test_topi_broadcast.py
index 9826b88ee2db..b41f7f7a2b0f 100644
--- a/tests/python/topi/python/test_topi_broadcast.py
+++ b/tests/python/topi/python/test_topi_broadcast.py
@@ -33,7 +33,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="broadcast_to")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
@@ -81,7 +81,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(C)
         foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + ftopi.__name__)
 
@@ -252,7 +252,7 @@ def test_apply(
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(B)
             foo = tvm.build(s, [A, B], device, name=name)
 
@@ -290,7 +290,7 @@ def test_apply(
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(B)
             foo = tvm.build(s, [A, B], device, name=name)
 
@@ -329,7 +329,7 @@ def test_apply(
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(C)
             foo = tvm.build(s, [A, B, C], device, name=name)
 
diff --git a/tests/python/topi/python/test_topi_clip.py b/tests/python/topi/python/test_topi_clip.py
index 70af1f84cae0..8f018b5f50d9 100644
--- a/tests/python/topi/python/test_topi_clip.py
+++ b/tests/python/topi/python/test_topi_clip.py
@@ -40,7 +40,7 @@ def get_ref_data():
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
 
         a = tvm.nd.array(a_np, ctx)
diff --git a/tests/python/topi/python/test_topi_conv1d.py b/tests/python/topi/python/test_topi_conv1d.py
index b50aa56c74fe..77a37ff475b5 100644
--- a/tests/python/topi/python/test_topi_conv1d.py
+++ b/tests/python/topi/python/test_topi_conv1d.py
@@ -78,7 +78,7 @@ def check_device(device, ctx):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_ncw_implement)
         else:
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_nwc_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             B = fcompute(A, W, stride, padding, dilation, 'float32')
             s = fschedule([B])
 
diff --git a/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py b/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py
index fc5819be3330..d5755796e38d 100644
--- a/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py
+++ b/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py
@@ -51,7 +51,7 @@ def get_ref_data():
 
     def check_device(device, ctx):
         ctx = tvm.context(device, 0)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_transpose_ncw_implement)
             B = fcompute(A, W, stride, padding, A.dtype, output_padding)
             C = topi.nn.relu(B)
diff --git a/tests/python/topi/python/test_topi_conv2d_NCHWc.py b/tests/python/topi/python/test_topi_conv2d_NCHWc.py
index 604d09d6837a..b1df3585888a 100644
--- a/tests/python/topi/python/test_topi_conv2d_NCHWc.py
+++ b/tests/python/topi/python/test_topi_conv2d_NCHWc.py
@@ -97,7 +97,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.x86.conv2d_NCHWc(A, W, (stride, stride), padding,
                                       (dilation, dilation),
                                       'NCHW%dc'%ic_block,
diff --git a/tests/python/topi/python/test_topi_conv2d_hwcn.py b/tests/python/topi/python/test_topi_conv2d_hwcn.py
index 04f34b6ea673..71a83fc14f09 100644
--- a/tests/python/topi/python/test_topi_conv2d_hwcn.py
+++ b/tests/python/topi/python/test_topi_conv2d_hwcn.py
@@ -63,7 +63,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_hwcn_implement)
             t_conv = fcompute(A, W, stride, padding, dilation)
             t_bias = topi.add(t_conv, B)
diff --git a/tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py b/tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py
index ea1aee1cae66..f0eb2d213c03 100644
--- a/tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py
+++ b/tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py
@@ -96,7 +96,7 @@ def check_device(device):
             print("skip because gpu does not support Tensor Cores")
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = topi.testing.dispatch(device, _conv2d_hwnc_tensorcore_implement)
             C = fcompute(A, W, stride, padding, dilation, dtype, 'int32')
             s = fschedule([C])
diff --git a/tests/python/topi/python/test_topi_conv2d_int8.py b/tests/python/topi/python/test_topi_conv2d_int8.py
index c18946b2b933..8082044183a5 100644
--- a/tests/python/topi/python/test_topi_conv2d_int8.py
+++ b/tests/python/topi/python/test_topi_conv2d_int8.py
@@ -50,7 +50,7 @@ def compile_conv2d_NHWC_gemm_int8_arm(batch, in_channel, in_size, num_filter, ke
         print("Skip because %s is not enabled" % device)
         return
     print("Compiling on arm AArch64 target: %s" % device)
-    with tvm.target.create(device):
+    with tvm.target.Target(device):
         assert is_aarch64_arm(), "AArch64 target not recognized"
 
         C = topi.arm_cpu.compute_conv2d_NHWC_quantized(A, W, (stride, stride), padding,
@@ -133,7 +133,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.arm_cpu.compute_conv2d_NHWC_quantized(A, W, (stride, stride), padding,
                                                            (dilation, dilation), dtype)
             if add_bias:
@@ -232,7 +232,7 @@ def check_device(device):
             return
 
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.cuda.conv2d_NCHWc_int8(A, W, (stride, stride), padding, (dilation, dilation),
                                             'NCHW', dtype)
             if add_bias:
@@ -302,7 +302,7 @@ def check_device(device):
             return
 
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.cuda.conv2d_nchw_int8(A, W, (stride, stride), padding, (dilation, dilation),
                                            dtype)
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_conv2d_nchw.py b/tests/python/topi/python/test_topi_conv2d_nchw.py
index a306e3edae11..033869f00738 100644
--- a/tests/python/topi/python/test_topi_conv2d_nchw.py
+++ b/tests/python/topi/python/test_topi_conv2d_nchw.py
@@ -73,7 +73,7 @@ def check_device(device):
         else:
             fcompute, fschedule = tvm.topi.testing.get_conv2d_nchw_implement(device)
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             if "cudnn" in device:
                 C = fcompute(A, W, (stride, stride), padding, (dilation, dilation), 1, "NCHW", dtype)
             else:
diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc.py b/tests/python/topi/python/test_topi_conv2d_nhwc.py
index 29b8634869ff..7482d64bce9c 100644
--- a/tests/python/topi/python/test_topi_conv2d_nhwc.py
+++ b/tests/python/topi/python/test_topi_conv2d_nhwc.py
@@ -60,7 +60,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_implement)
             B = fcompute(A, W, stride, padding, dilation, dtype)
             s = fschedule([B])
diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc_pack_int8.py b/tests/python/topi/python/test_topi_conv2d_nhwc_pack_int8.py
index 019dd30fda2f..dc9599c2c97e 100644
--- a/tests/python/topi/python/test_topi_conv2d_nhwc_pack_int8.py
+++ b/tests/python/topi/python/test_topi_conv2d_nhwc_pack_int8.py
@@ -56,7 +56,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             B = topi.nn.conv2d(A, W, stride, padding, dilation, layout='NHWC', out_dtype="int32")
             s = topi.x86.schedule_conv2d_nhwc_pack_int8([B])
         a = tvm.nd.array(a_np, ctx)
diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc_tensorcore.py b/tests/python/topi/python/test_topi_conv2d_nhwc_tensorcore.py
index fb0167a1e045..1223b0e216c3 100644
--- a/tests/python/topi/python/test_topi_conv2d_nhwc_tensorcore.py
+++ b/tests/python/topi/python/test_topi_conv2d_nhwc_tensorcore.py
@@ -78,7 +78,7 @@ def check_device(device):
             print("skip because gpu does not support Tensor Cores")
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_tensorcore_implement)
             C = fcompute(A, W, stride, padding, dilation, 'float32')
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc_winograd.py b/tests/python/topi/python/test_topi_conv2d_nhwc_winograd.py
index cbcc32d0b425..0bb0e69d683e 100644
--- a/tests/python/topi/python/test_topi_conv2d_nhwc_winograd.py
+++ b/tests/python/topi/python/test_topi_conv2d_nhwc_winograd.py
@@ -79,7 +79,7 @@ def get_ref_data():
     def check_device(device):
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             if bgemm == "direct":
                 fcompute, fschedule = tvm.topi.testing.dispatch(device,
                                                             _conv2d_nhwc_winograd_direct)
diff --git a/tests/python/topi/python/test_topi_conv2d_transpose_nchw.py b/tests/python/topi/python/test_topi_conv2d_transpose_nchw.py
index 8c30f441e622..89928f20ae97 100644
--- a/tests/python/topi/python/test_topi_conv2d_transpose_nchw.py
+++ b/tests/python/topi/python/test_topi_conv2d_transpose_nchw.py
@@ -59,7 +59,7 @@ def get_ref_data():
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_transpose_nchw_implement)
             B = fcompute(A, W,
                          [stride_height, stride_width],
diff --git a/tests/python/topi/python/test_topi_conv2d_winograd.py b/tests/python/topi/python/test_topi_conv2d_winograd.py
index 674590a7fa0f..bb9fdeee63ef 100644
--- a/tests/python/topi/python/test_topi_conv2d_winograd.py
+++ b/tests/python/topi/python/test_topi_conv2d_winograd.py
@@ -75,7 +75,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nchw_winograd_implement)
             C = fcompute(A, W, stride, padding, dilation, dtype)
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_conv3d_ncdhw.py b/tests/python/topi/python/test_topi_conv3d_ncdhw.py
index 319fb723da76..73de19cd82b0 100644
--- a/tests/python/topi/python/test_topi_conv3d_ncdhw.py
+++ b/tests/python/topi/python/test_topi_conv3d_ncdhw.py
@@ -68,7 +68,7 @@ def get_ref_data():
     def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ncdhw_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = fcompute(A, W, (stride, stride, stride), padding,
                          (dilation, dilation, dilation), dtype)
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_conv3d_ndhwc.py b/tests/python/topi/python/test_topi_conv3d_ndhwc.py
index 7e330e77a365..82216c8b6d69 100644
--- a/tests/python/topi/python/test_topi_conv3d_ndhwc.py
+++ b/tests/python/topi/python/test_topi_conv3d_ndhwc.py
@@ -60,7 +60,7 @@ def get_ref_data():
     def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             B = fcompute(A, W, stride, padding, dilation, dtype)
             s = fschedule([B])
         ctx = tvm.context(device, 0)
diff --git a/tests/python/topi/python/test_topi_conv3d_ndhwc_tensorcore.py b/tests/python/topi/python/test_topi_conv3d_ndhwc_tensorcore.py
index 9f92efa54222..3a6d2445911c 100644
--- a/tests/python/topi/python/test_topi_conv3d_ndhwc_tensorcore.py
+++ b/tests/python/topi/python/test_topi_conv3d_ndhwc_tensorcore.py
@@ -73,7 +73,7 @@ def get_ref_data():
     def check_device(device):
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_tensorcore_implement)
             C = fcompute(A, W, stride, padding, dilation, 'float32')
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_conv3d_transpose_ncdhw.py b/tests/python/topi/python/test_topi_conv3d_transpose_ncdhw.py
index 25d9b725dedf..7a0121db5dc5 100644
--- a/tests/python/topi/python/test_topi_conv3d_transpose_ncdhw.py
+++ b/tests/python/topi/python/test_topi_conv3d_transpose_ncdhw.py
@@ -56,7 +56,7 @@ def get_ref_data():
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_transpose_ncdhw_implement)
             B = fcompute(A, W,
                          [stride_depth, stride_height, stride_width],
diff --git a/tests/python/topi/python/test_topi_conv3d_winograd.py b/tests/python/topi/python/test_topi_conv3d_winograd.py
index a6e528c85e33..e049aec4b39d 100644
--- a/tests/python/topi/python/test_topi_conv3d_winograd.py
+++ b/tests/python/topi/python/test_topi_conv3d_winograd.py
@@ -83,7 +83,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ncdhw_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = fcompute(A, W, (stride, stride, stride), padding, (dilation, dilation, dilation),
                          dtype)
             if add_bias:
diff --git a/tests/python/topi/python/test_topi_correlation.py b/tests/python/topi/python/test_topi_correlation.py
index 81063925ebc3..b7928301974f 100644
--- a/tests/python/topi/python/test_topi_correlation.py
+++ b/tests/python/topi/python/test_topi_correlation.py
@@ -53,7 +53,7 @@ def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(
             device, _correlation_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = fcompute(A, B, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply)
             s = fschedule([C])
 
diff --git a/tests/python/topi/python/test_topi_deformable_conv2d.py b/tests/python/topi/python/test_topi_deformable_conv2d.py
index 5d361b4d02b4..3f3eca6d150a 100644
--- a/tests/python/topi/python/test_topi_deformable_conv2d.py
+++ b/tests/python/topi/python/test_topi_deformable_conv2d.py
@@ -67,7 +67,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _deformable_conv2d_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = fcompute(A, Offset, W, stride, padding, dilation,
                          deformable_groups, groups, dtype)
             s = fschedule([C])
diff --git a/tests/python/topi/python/test_topi_dense.py b/tests/python/topi/python/test_topi_dense.py
index e6530e751e1b..94e367017ed3 100644
--- a/tests/python/topi/python/test_topi_dense.py
+++ b/tests/python/topi/python/test_topi_dense.py
@@ -61,7 +61,7 @@ def get_ref_data():
     def check_device(device, ctx):
         print("Running on target: %s" % device)
         for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 D = fcompute(A, B, C if use_bias else None)
                 D = topi.nn.relu(D)
                 s = fschedule([D])
@@ -106,7 +106,7 @@ def check_device(device):
             return
 
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             D = topi.cuda.dense_int8(A, B, C if use_bias else None, out_dtype)
             D = topi.nn.relu(D)
             s = topi.cuda.schedule_dense_int8([D])
diff --git a/tests/python/topi/python/test_topi_dense_tensorcore.py b/tests/python/topi/python/test_topi_dense_tensorcore.py
index 642d124a86a5..11dc407e783c 100644
--- a/tests/python/topi/python/test_topi_dense_tensorcore.py
+++ b/tests/python/topi/python/test_topi_dense_tensorcore.py
@@ -55,7 +55,7 @@ def check_device(device):
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
         for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 D = fcompute(A, B, C if use_bias else None)
                 D = topi.nn.relu(D)
                 s = fschedule([D])
diff --git a/tests/python/topi/python/test_topi_depth_to_space.py b/tests/python/topi/python/test_topi_depth_to_space.py
index c94981235522..182f0993bf01 100644
--- a/tests/python/topi/python/test_topi_depth_to_space.py
+++ b/tests/python/topi/python/test_topi_depth_to_space.py
@@ -51,7 +51,7 @@ def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, la
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py
index bc804cb978f9..f9c0a1c8d1b7 100644
--- a/tests/python/topi/python/test_topi_depthwise_conv2d.py
+++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py
@@ -75,7 +75,7 @@ def check_device(device, ctx):
             impl_list.append((topi.x86.depthwise_conv2d_nchw, topi.x86.schedule_depthwise_conv2d_nchw))
 
         for fcompute, fschedule in impl_list:
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 # declare
                 DepthwiseConv2d = fcompute(Input, Filter, (stride_h, stride_w),
                                            padding_args, dilation, dtype)
@@ -170,7 +170,7 @@ def check_device(device, ctx):
         print("Running on target: %s" % device)
 
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _depthwise_conv2d_nhwc_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             # declare
             DepthwiseConv2d = fcompute(Input, Filter,
                 (stride_h, stride_w), padding_args, dilation, dtype)
@@ -294,7 +294,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             # declare
             DepthwiseConv2d = topi.x86.depthwise_conv2d_NCHWc(Input, Filter,
                                                               (stride_h, stride_w),
diff --git a/tests/python/topi/python/test_topi_group_conv2d.py b/tests/python/topi/python/test_topi_group_conv2d.py
index 2eea4b0ca43b..959b15c8b0b4 100644
--- a/tests/python/topi/python/test_topi_group_conv2d.py
+++ b/tests/python/topi/python/test_topi_group_conv2d.py
@@ -77,7 +77,7 @@ def check_device(device):
             return
 
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _group_conv2d_nchw_implement)
             C = fcompute(A, W, stride, padding, dilation, groups, dtype)
             if add_bias:
@@ -157,7 +157,7 @@ def check_device(device):
             return
 
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.cuda.group_conv2d_NCHWc_int8(A, W, stride, padding, dilation, groups, dtype)
             if add_bias:
                 C = topi.add(C, bias)
diff --git a/tests/python/topi/python/test_topi_group_conv2d_NCHWc_int8.py b/tests/python/topi/python/test_topi_group_conv2d_NCHWc_int8.py
index c5eebf411634..20c449028f4b 100644
--- a/tests/python/topi/python/test_topi_group_conv2d_NCHWc_int8.py
+++ b/tests/python/topi/python/test_topi_group_conv2d_NCHWc_int8.py
@@ -79,7 +79,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             C = topi.x86.conv2d_NCHWc(A, W, (stride, stride), (padding, padding),
                                       (dilation, dilation),
                                       'NCHW%dc'%ic_block,
diff --git a/tests/python/topi/python/test_topi_image.py b/tests/python/topi/python/test_topi_image.py
index 2fafe6c131ea..207dfe712c1b 100644
--- a/tests/python/topi/python/test_topi_image.py
+++ b/tests/python/topi/python/test_topi_image.py
@@ -48,7 +48,7 @@ def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
@@ -112,7 +112,7 @@ def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth,
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
@@ -170,7 +170,7 @@ def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size,
                                                           extrapolation_value)
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
             tvm_images = tvm.nd.array(np_images, ctx)
             tvm_boxes = tvm.nd.array(np_boxes, ctx)
@@ -217,7 +217,7 @@ def get_ref_data():
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
             tvm_data = tvm.nd.array(data_np, ctx)
             tvm_out = tvm.nd.empty(out_np.shape, dtype, ctx)
@@ -254,7 +254,7 @@ def get_ref_data():
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
             tvm_data = tvm.nd.array(data_np, ctx)
             tvm_grid = tvm.nd.array(grid_np, ctx)
diff --git a/tests/python/topi/python/test_topi_lrn.py b/tests/python/topi/python/test_topi_lrn.py
index 13dcc715f9f5..b753ca1c24bd 100644
--- a/tests/python/topi/python/test_topi_lrn.py
+++ b/tests/python/topi/python/test_topi_lrn.py
@@ -46,7 +46,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _lrn_schedule)
             s = s_func([B])
         ctx = tvm.context(device, 0)
diff --git a/tests/python/topi/python/test_topi_math.py b/tests/python/topi/python/test_topi_math.py
index 9a7bc6ea004f..f068c97e0a06 100644
--- a/tests/python/topi/python/test_topi_math.py
+++ b/tests/python/topi/python/test_topi_math.py
@@ -59,7 +59,7 @@ def test_apply(
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
             foo = tvm.build(s, [A, B], device, name=name)
             a = tvm.nd.array(a_np, ctx)
@@ -95,7 +95,7 @@ def test_isnan(
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
             foo = tvm.build(s, [A, B], device, name="isnan")
             a = tvm.nd.array(a_np, ctx)
@@ -121,7 +121,7 @@ def test_infiniteness_ops(topi_op, ref_op, name):
             b_np = ref_op(a_np)
 
             def check_device(device, ctx):
-                with tvm.target.create(device):
+                with tvm.target.Target(device):
                     s = tvm.topi.testing.get_injective_schedule(device)(B)
                 foo = tvm.build(s, [A, B], device, name=name)
                 a = tvm.nd.array(a_np, ctx)
@@ -172,7 +172,7 @@ def verify(from_dtype, to_dtype, low=-100, high=100):
 
         for device, ctx in tvm.testing.enabled_targets():
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
             foo = tvm.build(s, [A, B], device)
             a = tvm.nd.array(a_np, ctx)
@@ -211,7 +211,7 @@ def check_device(device):
             if not tvm.testing.device_enabled(device):
                 print("Skip because %s is not enabled" % device)
                 return
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = topi.generic.schedule_injective(B)
             func = tvm.build(s, [A, B], device, name=name)
             a = tvm.nd.array(a_np, ctx)
diff --git a/tests/python/topi/python/test_topi_pooling.py b/tests/python/topi/python/test_topi_pooling.py
index 2f3a38c3df5e..30b532e43f00 100644
--- a/tests/python/topi/python/test_topi_pooling.py
+++ b/tests/python/topi/python/test_topi_pooling.py
@@ -94,7 +94,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
             s = s_func(B, layout)
 
@@ -146,7 +146,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_grad_schedule)
             s = s_func(PoolGrad)
 
@@ -219,7 +219,7 @@ def verify_global_pool(dshape, pool_type, layout='NCHW'):
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
             if device == "cuda":
                 s = s_func(B, layout)
@@ -262,7 +262,7 @@ def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="floa
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
             if device == "cuda":
                 s = s_func(out, layout)
@@ -320,7 +320,7 @@ def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type,
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
             s = s_func(B, layout)
 
@@ -372,7 +372,7 @@ def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type,
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
             s = s_func(B, layout)
 
diff --git a/tests/python/topi/python/test_topi_reduce.py b/tests/python/topi/python/test_topi_reduce.py
index 33706e6cdc3c..c64624fa68ff 100644
--- a/tests/python/topi/python/test_topi_reduce.py
+++ b/tests/python/topi/python/test_topi_reduce.py
@@ -70,7 +70,7 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32")
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_reduce_schedule(device)(B)
 
         foo = tvm.build(s, [A, B], device, name=type)
diff --git a/tests/python/topi/python/test_topi_relu.py b/tests/python/topi/python/test_topi_relu.py
index 74425386bcbe..21e06b5257cb 100644
--- a/tests/python/topi/python/test_topi_relu.py
+++ b/tests/python/topi/python/test_topi_relu.py
@@ -38,7 +38,7 @@ def check_device(device, ctx):
             print("Skip because %s does not have fp16 support" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_elemwise_schedule(device)(B)
 
         a = tvm.nd.array(a_np, ctx)
diff --git a/tests/python/topi/python/test_topi_reorg.py b/tests/python/topi/python/test_topi_reorg.py
index 2b49461bf8de..95e028d51a87 100644
--- a/tests/python/topi/python/test_topi_reorg.py
+++ b/tests/python/topi/python/test_topi_reorg.py
@@ -52,7 +52,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s_func = tvm.topi.testing.dispatch(device, _reorg_schedule)
             s = s_func([B])
         a = tvm.nd.array(a_np, ctx)
diff --git a/tests/python/topi/python/test_topi_softmax.py b/tests/python/topi/python/test_topi_softmax.py
index 46322ba38e50..5107b6468097 100644
--- a/tests/python/topi/python/test_topi_softmax.py
+++ b/tests/python/topi/python/test_topi_softmax.py
@@ -35,7 +35,7 @@
 
 def check_device(A, B, a_np, b_np, device, ctx, name):
     print("Running on target: %s" % device)
-    with tvm.target.create(device):
+    with tvm.target.Target(device):
         s_func = tvm.topi.testing.dispatch(device, _softmax_schedule)
         s = s_func(B)
 
diff --git a/tests/python/topi/python/test_topi_sort.py b/tests/python/topi/python/test_topi_sort.py
index 603d2ef51851..e33531fd4d69 100644
--- a/tests/python/topi/python/test_topi_sort.py
+++ b/tests/python/topi/python/test_topi_sort.py
@@ -58,7 +58,7 @@ def check_device(device):
             return
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _argsort_implement)
             out = fcompute(data, axis=axis, is_ascend=is_ascend)
             s = fschedule(out)
@@ -102,7 +102,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _topk_implement)
             outs = fcompute(data, k, axis, ret_type, is_ascend, dtype)
             outs = outs if isinstance(outs, list) else [outs]
diff --git a/tests/python/topi/python/test_topi_space_to_depth.py b/tests/python/topi/python/test_topi_space_to_depth.py
index 509678513ddd..504c359c936a 100644
--- a/tests/python/topi/python/test_topi_space_to_depth.py
+++ b/tests/python/topi/python/test_topi_space_to_depth.py
@@ -50,7 +50,7 @@ def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, la
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
diff --git a/tests/python/topi/python/test_topi_sparse.py b/tests/python/topi/python/test_topi_sparse.py
index f0e701b13047..dbab292f38e0 100644
--- a/tests/python/topi/python/test_topi_sparse.py
+++ b/tests/python/topi/python/test_topi_sparse.py
@@ -309,7 +309,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _sparse_dense_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             Y = fcompute(X, W_data, W_indices, W_indptr)
             if use_relu:
                 Y = topi.nn.relu(Y)
@@ -359,7 +359,7 @@ def check_device(device):
                 return
             print("Running on target: %s" % device)
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _sparse_dense_implement)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 Y = fcompute(X, W_data, W_indices, W_indptr)
                 s = fschedule([Y])
                 func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
diff --git a/tests/python/topi/python/test_topi_tensor.py b/tests/python/topi/python/test_topi_tensor.py
index 53e48bf9da18..7052a7ebc201 100644
--- a/tests/python/topi/python/test_topi_tensor.py
+++ b/tests/python/topi/python/test_topi_tensor.py
@@ -96,7 +96,7 @@ def check_device(device):
         if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             ctx = tvm.context(device, 0)
             A = te.placeholder((n, m), name='A', dtype=dtype)
             B = te.compute((n, m), lambda i, j:
diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index 12e50b49a307..a061ba9d3ce6 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -30,7 +30,7 @@ def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     B = topi.expand_dims(A, axis, num_newaxis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="expand_dims")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
@@ -52,7 +52,7 @@ def check_device(device, ctx):
             print("Skip because %s does not have fp16 support" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_elemwise_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="reinterpret")
         data_npy = generator(in_shape).astype(in_dtype)
@@ -71,7 +71,7 @@ def verify_transpose(in_shape, axes):
     B = topi.transpose(A, axes)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="transpose")
         data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype)
@@ -90,7 +90,7 @@ def verify_reshape(src_shape, dst_shape):
     B = topi.reshape(A, dst_shape)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="reshape")
         data_npy = np.random.normal(size=src_shape).astype(A.dtype)
@@ -109,7 +109,7 @@ def verify_squeeze(src_shape, axis):
     B = topi.squeeze(A, axis=axis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
 
         foo = tvm.build(s, [A, B], device, name="squeeze")
@@ -132,7 +132,7 @@ def get_concat_schedule(target):
             "arm_cpu": topi.arm_cpu.schedule_concatenate,
         }
         if isinstance(target, str):
-            target = tvm.target.create(target)
+            target = tvm.target.Target(target)
         for key in target.keys:
             if key in schedule_map:
                 return schedule_map[key]
@@ -144,7 +144,7 @@ def get_concat_schedule(target):
     out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = get_concat_schedule(device)(out_tensor)
 
         foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate")
@@ -165,7 +165,7 @@ def verify_stack(shapes, axis):
     out_tensor = topi.stack(tensor_l, axis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(out_tensor)
 
         foo = tvm.build(s, tensor_l + [out_tensor], device, name="stack")
@@ -185,7 +185,7 @@ def verify_split(src_shape, indices_or_sections, axis):
     tensor_l = topi.split(A, indices_or_sections, axis=axis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(tensor_l)
 
         foo = tvm.build(s, [A] + list(tensor_l), device, name="split")
@@ -241,7 +241,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
 
         foo = tvm.build(s, [A, B], device, name="reverse")
@@ -266,7 +266,7 @@ def verify_reverse_sequence(in_data, seq_lengths, batch_axis, seq_axis, ref_res)
 
         def check_device(device, ctx):
             print("Running on target: %s" % device)
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(C)
 
             foo = tvm.build(s, [A, B, C], device, name="reverse_sequence")
@@ -348,7 +348,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
 
         foo = tvm.build(s, [A] + [indices] + [out_tensor] , device, name="take")
@@ -383,7 +383,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
 
         foo = tvm.build(s, [A, B], device, name="stride_slice")
@@ -415,7 +415,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
 
         if strides is not None:
@@ -454,7 +454,7 @@ def verify_gather(data, axis, indices):
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
 
         func = tvm.build(s, [var_data, var_indices, out_tensor] , device, name="gather")
@@ -478,7 +478,7 @@ def verify_gather_nd(src_shape, indices_src, indices_dtype):
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
 
         func = tvm.build(s, [A, indices, out_tensor] , device, name="take")
@@ -513,7 +513,7 @@ def verify_arange(start, stop, step):
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(A)
         f = tvm.build(s, [A], device, name="arange")
         a_nd = tvm.nd.empty(a_np.shape, dtype='float32', ctx=ctx)
@@ -528,7 +528,7 @@ def verify_repeat(in_shape, repeats, axis):
     B = topi.repeat(A, repeats, axis)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="repeat")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
@@ -546,7 +546,7 @@ def verify_tile(in_shape, reps):
     B = topi.tile(A, reps)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
         foo = tvm.build(s, [A, B], device, name="tile")
         data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
@@ -567,7 +567,7 @@ def verify_where(in_shape):
     C = topi.where(Cond, A, B)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(C)
         f = tvm.build(s, [Cond, A, B, C], device, name="where")
         cond_npy = np.random.uniform(low=-1, high=1, size=in_shape).astype(dtype)
@@ -591,7 +591,7 @@ def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype):
     one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype)
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(one_hot_result)
         fn = tvm.build(s, [indices, one_hot_result], device, name="one_hot")
         indices_npy = np.random.randint(0, depth, size=indices_shape).astype(indices.dtype)
@@ -620,7 +620,7 @@ def verify_unravel_index(indices, shape, dtype):
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(Z)
         foo = tvm.build(s, [X, Y, Z], device, name="unravel_index")
 
@@ -652,7 +652,7 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(D)
 
         foo = tvm.build(s, args + [D], device, name="sparse_to_dense")
@@ -681,7 +681,7 @@ def verify_matrix_set_diag(input_shape, dtype):
     def check_device(device, ctx):
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(matrix_set_diag_result)
         fn = tvm.build(s, [input, diagonal, matrix_set_diag_result], device, name="matrix_set_diag")
         input_npy = np.random.randint(-100, 100, size=input_shape).astype(dtype)
@@ -775,7 +775,7 @@ def test_squeeze():
     for device in ['cuda', 'opencl']:
         ctx = tvm.context(device, 0)
         if tvm.testing.device_enabled(device):
-            with tvm.target.create(device):
+            with tvm.target.Target(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(C)
                 func = tvm.build(s, [A, C])
             a = tvm.nd.array(np.array((1, 2)).astype('float32'), ctx=ctx)
@@ -915,7 +915,7 @@ def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx)
         tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=B.dtype)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         f = tvm.build(s, [A, B], device, name="layout_transform")
         f(tvm_input, tvm_output)
@@ -939,7 +939,7 @@ def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx)
         tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=dtype)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         f = tvm.build(s, [A, B], device, name="shape")
         f(tvm_input, tvm_output)
@@ -968,7 +968,7 @@ def check_device(device, ctx):
                     tvm_B = tvm.nd.array(B_data, ctx)
                     tvm_C = tvm.nd.empty(in_shape, ctx=ctx, dtype="float32")
                     print("Running on target: %s" % device)
-                    with tvm.target.create(device):
+                    with tvm.target.Target(device):
                         s = tvm.topi.testing.get_injective_schedule(device)(C)
                     f = tvm.build(s, [A, B, C], device, name="SequenceMask")
                     f(tvm_A, tvm_B, tvm_C)
@@ -990,7 +990,7 @@ def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx=ctx)
         tvm_output = tvm.nd.empty((), ctx=ctx, dtype=B.dtype)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         f = tvm.build(s, [A, B], device, name="ndarray_size")
         f(tvm_input, tvm_output)
@@ -1004,7 +1004,7 @@ def check_device(device, ctx):
 def test_where_fusion():
     """integration test that where and zeros should be properly inlined"""
     def check_device(device, ctx):
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             print("Running on target: %s" % device)
             conv2d_compute, conv2d_schedule = tvm.topi.testing.get_conv2d_nchw_implement(device)
             data = te.placeholder((2, 1, 2, 4), 'int8', 'data')
diff --git a/tests/python/topi/python/test_topi_upsampling.py b/tests/python/topi/python/test_topi_upsampling.py
index 7861a29f7950..09ca58d0813d 100644
--- a/tests/python/topi/python/test_topi_upsampling.py
+++ b/tests/python/topi/python/test_topi_upsampling.py
@@ -58,7 +58,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w,
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
@@ -138,7 +138,7 @@ def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_
 
     def check_device(device, ctx):
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx)
diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py
index 691dcdfaf926..22f4683eb715 100644
--- a/tests/python/topi/python/test_topi_vision.py
+++ b/tests/python/topi/python/test_topi_vision.py
@@ -94,7 +94,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _get_valid_counts_implement)
             data = te.placeholder(dshape, name="data", dtype=dtype)
             outs = fcompute(data, score_threshold, id_index, score_index)
@@ -146,7 +146,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _nms_implement)
             out = fcompute(data, valid_count, indices, max_output_size, iou_threshold, force_suppress,
                            top_k, coord_start=coord_start, score_index=score_index, id_index=id_index,
@@ -252,7 +252,7 @@ def check_device(device):
         print("Running on target: %s" % device)
 
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _multibox_prior_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             out = fcompute(data, sizes, ratios, steps, offsets, clip)
             s = fschedule(out)
 
@@ -299,7 +299,7 @@ def check_device(device):
         print("Running on target: %s" % device)
 
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _multibox_detection_implement)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             out = fcompute(cls_prob, loc_preds, anchors)
             s = fschedule(out)
 
@@ -342,7 +342,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _roi_align_implement)
             b = fcompute(a, rois, pooled_size=pooled_size,
                          spatial_scale=spatial_scale,
@@ -394,7 +394,7 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             b = topi.vision.rcnn.roi_pool_nchw(a, rois, pooled_size=pooled_size,
                                                 spatial_scale=spatial_scale)
             s_func = tvm.topi.testing.dispatch(device, _roi_pool_schedule)
@@ -428,7 +428,7 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _proposal_implement)
             out = fcompute(cls_prob, bbox_pred, im_info, **attrs)
             s = fschedule(out)
diff --git a/tests/python/unittest/test_auto_scheduler_cost_model.py b/tests/python/unittest/test_auto_scheduler_cost_model.py
index 6f7792b3083f..5d58ae041e21 100644
--- a/tests/python/unittest/test_auto_scheduler_cost_model.py
+++ b/tests/python/unittest/test_auto_scheduler_cost_model.py
@@ -30,9 +30,10 @@
 def get_sample_records(number):
     """Generate random a list of random MeasureInput and MeasureResult pairs"""
     N = 128
-    workload_key = auto_scheduler.make_workload_key(matmul_auto_scheduler_test, (N, N, N))
+    workload_key = auto_scheduler.make_workload_key(
+        matmul_auto_scheduler_test, (N, N, N))
     dag = auto_scheduler.ComputeDAG(workload_key)
-    target = tvm.target.create('llvm')
+    target = tvm.target.Target('llvm')
     task = auto_scheduler.SearchTask(dag, workload_key, target)
     policy = auto_scheduler.SketchPolicy(task, verbose=0)
     states = policy.sample_initial_population(number)
@@ -64,7 +65,8 @@ def test_xgb_model():
     costs = [np.mean([x.value for x in res.costs]) for res in results]
     throughputs = np.min(costs) / costs
 
-    rmse = np.sqrt(np.mean([np.square(pred - label) for pred, label in zip(preds, throughputs)]))
+    rmse = np.sqrt(np.mean([np.square(pred - label)
+                            for pred, label in zip(preds, throughputs)]))
     assert rmse <= 0.3
 
     with tempfile.NamedTemporaryFile() as fp:
diff --git a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
index f06f06ac73c0..ff43432b3b78 100644
--- a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
+++ b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
@@ -24,6 +24,7 @@
 
 class MockCostModel(PythonBasedModel):
     """A mock cost model that rates 1 only for the states with tile_k=2."""
+
     def predict(self, task, states):
         scores = []
         found = False
@@ -35,6 +36,7 @@ def predict(self, task, states):
             scores.append(1 if found else 0)
         return scores
 
+
 def test_evo_search():
     """Test evolutionary search. Since we cannot mock random number generator,
     we mocked the cost model to manually guide the evo search. If evo search works
@@ -42,10 +44,13 @@ def test_evo_search():
     This unit test has been tested with 1,000 runs with no failures, meaning that
     the failure rate is less than 0.1%.
     """
-    workload_key = auto_scheduler.make_workload_key(matmul_auto_scheduler_test, (10, 10, 4))
+    workload_key = auto_scheduler.make_workload_key(
+        matmul_auto_scheduler_test, (10, 10, 4))
     dag = auto_scheduler.ComputeDAG(workload_key)
-    task = auto_scheduler.SearchTask(dag, workload_key, tvm.target.create('llvm'))
-    policy = auto_scheduler.SketchPolicy(task, schedule_cost_model=MockCostModel(), verbose=0)
+    task = auto_scheduler.SearchTask(
+        dag, workload_key, tvm.target.Target('llvm'))
+    policy = auto_scheduler.SketchPolicy(
+        task, schedule_cost_model=MockCostModel(), verbose=0)
     states = policy.sample_initial_population(50)
     pruned_states = []
     for state in states:
diff --git a/tests/python/unittest/test_auto_scheduler_feature.py b/tests/python/unittest/test_auto_scheduler_feature.py
index 05f1cbb8641c..6d6f5cd400d6 100644
--- a/tests/python/unittest/test_auto_scheduler_feature.py
+++ b/tests/python/unittest/test_auto_scheduler_feature.py
@@ -44,10 +44,11 @@ def test_cpu_matmul():
     s.parallel(C, jo)
     s.unroll(C, k)
 
-    target = tvm.target.create('llvm')
+    target = tvm.target.Target('llvm')
     task = auto_scheduler.SearchTask(dag, "test", target)
     names = auto_scheduler.feature.get_per_store_feature_names()
-    fea = auto_scheduler.feature.get_per_store_features_from_states([s], task)[0]
+    fea = auto_scheduler.feature.get_per_store_features_from_states([s], task)[
+        0]
 
     stage_0 = fea[0]
     assert len(stage_0) == len(names), "%d vs %d" % (len(stage_0), len(names))
@@ -78,9 +79,11 @@ def test_cpu_matmul():
 
     # check touched memory in bytes, touched unique memory in bytes, reuse distance, etc.
     assert fequal(fea_dict[c_name + ".bytes"], math.log2(512 ** 3 * 4 + 1))
-    assert fequal(fea_dict[b_name + ".unique_bytes"], math.log2(512 ** 2 * 4 + 1))
+    assert fequal(fea_dict[b_name + ".unique_bytes"],
+                  math.log2(512 ** 2 * 4 + 1))
     assert fequal(fea_dict[c_name + ".reuse_dis_iter"], math.log2(8 * 16 + 1))
-    assert fequal(fea_dict[c_name + ".reuse_dis_bytes"], math.log2((8 * 16 + 8 + 16) * 4 + 1))
+    assert fequal(fea_dict[c_name + ".reuse_dis_bytes"],
+                  math.log2((8 * 16 + 8 + 16) * 4 + 1))
     assert fequal(fea_dict[c_name + ".reuse_ct"], math.log2(512 + 1))
 
     # check annotations
@@ -88,7 +91,8 @@ def test_cpu_matmul():
     # assert fequal(fea_dict["unroll_type.kPosInnerReduce"], 1.0)
     assert fequal(fea_dict["vec_num"], math.log2(1 + 1))
     assert fequal(fea_dict["parallel_num"], math.log2(2 + 1))
-    assert fequal(fea_dict["parallel_prod"], math.log2((512 * 512 / 16 / 8) + 1))
+    assert fequal(fea_dict["parallel_prod"],
+                  math.log2((512 * 512 / 16 / 8) + 1))
 
 
 def test_cpu_fusion():
@@ -102,10 +106,11 @@ def fusion_test(N, M):
     s = dag.get_init_state()
     s.compute_at(1, 2, s.stages[2].iters[1])
 
-    target = tvm.target.create('llvm')
+    target = tvm.target.Target('llvm')
     task = auto_scheduler.SearchTask(dag, "test", target)
     names = auto_scheduler.feature.get_per_store_feature_names()
-    fea = auto_scheduler.feature.get_per_store_features_from_states([s], task)[0]
+    fea = auto_scheduler.feature.get_per_store_features_from_states([s], task)[
+        0]
 
     """
     lowered IR:
@@ -124,8 +129,10 @@ def fusion_test(N, M):
     for stage_fea in fea:
         for i, (name, value) in enumerate(zip(names, stage_fea)):
             if 'reuse_type.kSerialMultipleReadWrite' in name and value > 0.5:
-                assert fequal(stage_fea[i + 2], 1.0)   # reuse distance in #iter
-                assert fequal(stage_fea[i + 3], math.log2(16 + 1))  # reuse distance in bytes
+                # reuse distance in #iter
+                assert fequal(stage_fea[i + 2], 1.0)
+                # reuse distance in bytes
+                assert fequal(stage_fea[i + 3], math.log2(16 + 1))
                 found = True
     assert found
 
@@ -144,10 +151,12 @@ def test_gpu_feature():
 
         inp = inputs[0]
         dag = auto_scheduler.ComputeDAG(inp.task.workload_key)
-        task = auto_scheduler.SearchTask(dag, inp.task.workload_key, inp.task.target, None, auto_scheduler.HardwareParams(100000, 16, 64))
+        task = auto_scheduler.SearchTask(
+            dag, inp.task.workload_key, inp.task.target, None, auto_scheduler.HardwareParams(100000, 16, 64))
 
         state = dag.infer_bound_from_state(inputs[0].state)
-        fea = auto_scheduler.feature.get_per_store_features_from_states([state], task)[0]
+        fea = auto_scheduler.feature.get_per_store_features_from_states([state], task)[
+            0]
         names = auto_scheduler.feature.get_per_store_feature_names()
 
         # build feature dict
diff --git a/tests/python/unittest/test_auto_scheduler_measure.py b/tests/python/unittest/test_auto_scheduler_measure.py
index 93a334c7d06b..c12240d3f1b8 100644
--- a/tests/python/unittest/test_auto_scheduler_measure.py
+++ b/tests/python/unittest/test_auto_scheduler_measure.py
@@ -25,8 +25,9 @@
 
 from test_auto_scheduler_common import matmul_auto_scheduler_test, get_tiled_matmul
 
+
 def record_common(dag, s):
-    target = tvm.target.create("llvm")
+    target = tvm.target.Target("llvm")
     task = auto_scheduler.SearchTask(dag, "test", target)
 
     inp = auto_scheduler.measure.MeasureInput(task, s)
@@ -53,7 +54,8 @@ def test_record_split_reorder_fuse_annotation():
     A = te.placeholder((512, 512), name='A')
     B = te.placeholder((512, 512), name='B')
     k = te.reduce_axis((0, 512), name='k')
-    C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C')
+    C = te.compute((512, 512), lambda i, j: te.sum(
+        A[i][k] * B[k][j], axis=[k]), name='C')
 
     dag = auto_scheduler.ComputeDAG([A, B, C])
     s = dag.get_init_state()
@@ -88,7 +90,8 @@ def test_record_compute_at_root_inline_cache_read_write():
     AA = topi.nn.relu(A)
     B = te.placeholder((512, 512), name='B')
     k = te.reduce_axis((0, 512), name='k')
-    C = te.compute((512, 512), lambda i, j: te.sum(AA[i][k] * B[k][j], axis=[k]), name='C')
+    C = te.compute((512, 512), lambda i, j: te.sum(
+        AA[i][k] * B[k][j], axis=[k]), name='C')
 
     dag = auto_scheduler.ComputeDAG([A, B, C])
     s = dag.get_init_state()
@@ -115,7 +118,8 @@ def test_record_follow_split_follow_fused_split():
     A = te.placeholder((512, 512), name='A')
     B = te.placeholder((512, 512), name='B')
     k = te.reduce_axis((0, 512), name='k')
-    C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C')
+    C = te.compute((512, 512), lambda i, j: te.sum(
+        A[i][k] * B[k][j], axis=[k]), name='C')
     D = topi.nn.relu(C)
     E = topi.nn.relu(D)
 
@@ -149,7 +153,8 @@ def test_record_pragma_storage_align_rfactor():
     A = te.placeholder((512, 512), name='A')
     B = te.placeholder((512, 512), name='B')
     k = te.reduce_axis((0, 512), name='k')
-    C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C')
+    C = te.compute((512, 512), lambda i, j: te.sum(
+        A[i][k] * B[k][j], axis=[k]), name='C')
 
     dag = auto_scheduler.ComputeDAG([A, B, C])
     s = dag.get_init_state()
@@ -170,7 +175,7 @@ def test_measure_local_builder_runner(enable_cpu_cache_flush=False):
         return
 
     dag, s0 = get_tiled_matmul()
-    tgt = tvm.target.create("llvm")
+    tgt = tvm.target.Target("llvm")
     task = auto_scheduler.SearchTask(dag, "test", tgt)
 
     minp = auto_scheduler.MeasureInput(task, s0)
@@ -189,7 +194,7 @@ def test_measure_local_builder_rpc_runner(enable_cpu_cache_flush=False):
         return
 
     dag, s0 = get_tiled_matmul()
-    tgt = tvm.target.create("llvm")
+    tgt = tvm.target.Target("llvm")
     task = auto_scheduler.SearchTask(dag, "test", tgt)
 
     minp = auto_scheduler.MeasureInput(task, s0)
diff --git a/tests/python/unittest/test_auto_scheduler_search_policy.py b/tests/python/unittest/test_auto_scheduler_search_policy.py
index bf7cefabc614..b10d520eeadf 100644
--- a/tests/python/unittest/test_auto_scheduler_search_policy.py
+++ b/tests/python/unittest/test_auto_scheduler_search_policy.py
@@ -26,6 +26,7 @@
 
 from test_auto_scheduler_common import matmul_auto_scheduler_test, PropagatingThread
 
+
 def search_common(workload=matmul_auto_scheduler_test, target="llvm",
                   search_policy='empty', seed=random.randint(1, 1 << 30), runner='local',
                   cost_model=auto_scheduler.RandomModel(), num_measure_trials=2,
@@ -36,24 +37,26 @@ def search_common(workload=matmul_auto_scheduler_test, target="llvm",
     N = 128
     workload_key = auto_scheduler.make_workload_key(workload, (N, N, N))
     dag = auto_scheduler.ComputeDAG(workload_key)
-    target = tvm.target.create(target)
+    target = tvm.target.Target(target)
     task = auto_scheduler.SearchTask(dag, workload_key, target)
 
     with tempfile.NamedTemporaryFile() as fp:
         log_file = fp.name
 
         init_search_callbacks = init_search_callbacks or []
-        init_search_callbacks.append(auto_scheduler.PreloadMeasuredStates(log_file))
+        init_search_callbacks.append(
+            auto_scheduler.PreloadMeasuredStates(log_file))
 
         if search_policy == 'empty':
             search_policy = auto_scheduler.EmptyPolicy(task)
         elif search_policy == 'sketch':
             search_policy = auto_scheduler.SketchPolicy(task, schedule_cost_model=cost_model,
-                    init_search_callbacks=init_search_callbacks)
+                                                        init_search_callbacks=init_search_callbacks)
 
         tuning_options = auto_scheduler.TuningOptions(num_measure_trials=num_measure_trials,
-                runner=runner, verbose=1, measure_callbacks=[auto_scheduler.RecordToFile(log_file)])
-        sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options)
+                                                      runner=runner, verbose=1, measure_callbacks=[auto_scheduler.RecordToFile(log_file)])
+        sch, args = auto_scheduler.auto_schedule(
+            task, search_policy, tuning_options)
         print("*"*80)
         print(target)
         print("*"*80)
diff --git a/tests/python/unittest/test_auto_scheduler_sketch_generation.py b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
index c35a3f75b7e2..6d4c26399118 100644
--- a/tests/python/unittest/test_auto_scheduler_sketch_generation.py
+++ b/tests/python/unittest/test_auto_scheduler_sketch_generation.py
@@ -32,7 +32,7 @@
 def generate_sketches(workload_func, args, target, print_for_debug=False):
     workload_key = auto_scheduler.make_workload_key(workload_func, args)
     dag = auto_scheduler.ComputeDAG(workload_key)
-    task = auto_scheduler.SearchTask(dag, workload_key, tvm.target.create(target))
+    task = auto_scheduler.SearchTask(dag, workload_key, tvm.target.Target(target))
     policy = auto_scheduler.SketchPolicy(task, verbose=0)
     return policy.generate_sketches(print_for_debug)
 
diff --git a/tests/python/unittest/test_autotvm_common.py b/tests/python/unittest/test_autotvm_common.py
index 909dbbcd4d14..8c22ccb5694f 100644
--- a/tests/python/unittest/test_autotvm_common.py
+++ b/tests/python/unittest/test_autotvm_common.py
@@ -37,13 +37,15 @@ def run(self, measure_inputs, build_results):
     def get_build_kwargs(self):
         return {}
 
+
 @autotvm.template("testing/matmul")
 def matmul(N, L, M, dtype):
     A = te.placeholder((N, L), name='A', dtype=dtype)
     B = te.placeholder((L, M), name='B', dtype=dtype)
 
     k = te.reduce_axis((0, L), name='k')
-    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+    C = te.compute((N, M), lambda i, j: te.sum(
+        A[i, k] * B[k, j], axis=k), name='C')
     s = te.create_schedule(C.op)
 
     # schedule
@@ -64,6 +66,7 @@ def matmul(N, L, M, dtype):
 
     return s, [A, B, C]
 
+
 @autotvm.template("testing/bad_matmul")
 def bad_matmul(N, L, M, dtype):
     if 'bad_device' in tvm.target.Target.current().keys:
@@ -71,7 +74,8 @@ def bad_matmul(N, L, M, dtype):
         B = te.placeholder((L, M), name='B', dtype=dtype)
 
         k = te.reduce_axis((0, L-1), name='k')
-        C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name='C')
+        C = te.compute((N, M), lambda i, j: te.sum(
+            A[i, k] * B[k, j], axis=k), name='C')
         s = te.create_schedule(C.op)
 
         # schedule
@@ -83,12 +87,15 @@ def bad_matmul(N, L, M, dtype):
 
     return matmul(N, L, M, dtype)
 
+
 def get_sample_task(n=128):
     """return a sample task for testing"""
-    target = tvm.target.create("llvm")
-    task = autotvm.task.create("testing/matmul", args=(n, n, n, 'float32'), target=target)
+    target = tvm.target.Target("llvm")
+    task = autotvm.task.create(
+        "testing/matmul", args=(n, n, n, 'float32'), target=target)
     return task, target
 
+
 def get_sample_records(n):
     """get sample records for testing"""
     tsk, target = get_sample_task()
diff --git a/tests/python/unittest/test_autotvm_measure.py b/tests/python/unittest/test_autotvm_measure.py
index f96d333ddbc3..11c95eb80990 100644
--- a/tests/python/unittest/test_autotvm_measure.py
+++ b/tests/python/unittest/test_autotvm_measure.py
@@ -46,6 +46,7 @@ def test_task_tuner_without_measurement():
         tuner.tune(n_trial=10, measure_option=measure_option)
         assert tuner.best_flops > 1
 
+
 def test_check_correctness():
     task, target = get_sample_task()
 
@@ -64,8 +65,9 @@ def _callback_correct(tuner, measure_inputs, measure_results):
 
     # a bad template
     n = 128
-    target = tvm.target.create("llvm -device=bad_device")
-    task = autotvm.task.create("testing/bad_matmul", args=(n, n, n, 'float32'), target=target)
+    target = tvm.target.Target("llvm -device=bad_device")
+    task = autotvm.task.create(
+        "testing/bad_matmul", args=(n, n, n, 'float32'), target=target)
 
     def _callback_wrong(tuner, measure_inputs, measure_results):
         for _, res in zip(measure_inputs, measure_results):
diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py
index 58174dd442a9..4bf8651f652e 100644
--- a/tests/python/unittest/test_runtime_heterogeneous.py
+++ b/tests/python/unittest/test_runtime_heterogeneous.py
@@ -24,6 +24,7 @@
 from tvm.contrib import graph_runtime, util
 from tvm import topi
 
+
 def get_simplex_graph(host_dev_type, device_dev_type):
     r""" Return the hand-crafted json object where only one copy node is
     inserted. This node copies data from the target device to cpu.
@@ -136,7 +137,7 @@ def check_device(device, target_device):
         tensor_a = te.placeholder(shape, name="A")
         tensor_b = te.placeholder(shape, name="B")
         elemwise_add = te.compute(shape, lambda *i: tensor_a(*i)
-                                   + tensor_b(*i), name="elemwise_add")
+                                  + tensor_b(*i), name="elemwise_add")
         target = topi.cpp.TEST_create_target(device)
         schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add])
         lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add],
@@ -150,7 +151,7 @@ def check_device(device, target_device):
         # Create module for sub whose target is the host.
         tensor_c = te.placeholder(shape, name="C")
         elemwise_sub = te.compute(shape, lambda *i: tensor_copy(*i)
-                                   - tensor_c(*i), name="elemwise_sub")
+                                  - tensor_c(*i), name="elemwise_sub")
         schedule_sub = te.create_schedule(elemwise_sub.op)
         lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c,
                                              elemwise_sub],
@@ -175,7 +176,7 @@ def check_device(device, target_device):
 
     dev_tar = {"cuda": "cuda", "opencl": "opencl"}
     for device, target in dev_tar.items():
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             check_device(device, target)
 
 
@@ -331,9 +332,9 @@ def check_device(device, target_device):
         tensor_b = te.placeholder(shape, name="B")
         tensor_d = te.placeholder(shape, name="D")
         elemwise_add0 = te.compute(shape, lambda *i: tensor_a(*i)
-                                    + tensor_b(*i), name="elemwise_add0")
+                                   + tensor_b(*i), name="elemwise_add0")
         elemwise_add1 = te.compute(shape, lambda *i: copy_sub_add(*i)
-                                    + tensor_d(*i), name="elemwise_add1")
+                                   + tensor_d(*i), name="elemwise_add1")
         target = topi.cpp.TEST_create_target(device)
         add_schedule0 = topi.cpp.cuda.schedule_injective(
             target, [elemwise_add0])
@@ -348,7 +349,7 @@ def check_device(device, target_device):
         # Create module for sub whose target is the host.
         tensor_c = te.placeholder(shape, name="C")
         elemwise_sub = te.compute(shape, lambda *i: copy_add_sub(*i)
-                                   - tensor_c(*i), name="elemwise_sub")
+                                  - tensor_c(*i), name="elemwise_sub")
         sub_schedule = te.create_schedule(elemwise_sub.op)
         lower_sub = tvm.lower(sub_schedule, [copy_add_sub, tensor_c,
                                              elemwise_sub],
@@ -397,9 +398,10 @@ def check_load_module():
 
     dev_tar = {"cuda": "cuda", "opencl": "opencl"}
     for device, target in dev_tar.items():
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             check_device(device, target)
 
+
 if __name__ == "__main__":
     test_simplex_data_transferring()
     test_duplex_data_transferring()
diff --git a/tests/python/unittest/test_target_codegen_cuda.py b/tests/python/unittest/test_target_codegen_cuda.py
index 567f5eace186..c046874f7d33 100644
--- a/tests/python/unittest/test_target_codegen_cuda.py
+++ b/tests/python/unittest/test_target_codegen_cuda.py
@@ -27,10 +27,12 @@
 tx = te.thread_axis("threadIdx.x")
 bx = te.thread_axis("blockIdx.x")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_vectorize_add():
     num_thread = 8
+
     def check_cuda(dtype, n, lanes):
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
@@ -39,7 +41,8 @@ def check_cuda(dtype, n, lanes):
             print("skip because gpu does not support int8")
             return
         A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
-        B = te.compute((n,), lambda i: A[i] + tvm.tir.const(1, A.dtype), name='B')
+        B = te.compute((n,), lambda i: A[i] +
+                       tvm.tir.const(1, A.dtype), name='B')
         s = te.create_schedule(B.op)
         xo, xi = s[B].split(B.op.axis[0], factor=num_thread)
         s[B].bind(xo, bx)
@@ -66,10 +69,12 @@ def check_cuda(dtype, n, lanes):
     check_cuda("float16", 64, 6)
     check_cuda("float16", 64, 8)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_multiply_add():
     num_thread = 8
+
     def check_cuda(dtype, n, lanes):
         if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version):
             print("skip because gpu does not support int8")
@@ -78,14 +83,14 @@ def check_cuda(dtype, n, lanes):
         B = te.placeholder((n,), name='B', dtype="%sx%d" % (dtype, lanes))
         C = te.placeholder((n,), name='C', dtype="int32")
         D = te.compute((n,),
-                        lambda i: tvm.tir.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D')
+                       lambda i: tvm.tir.call_pure_extern("int32", "__dp4a", A[i], B[i], C[i]), name='D')
         s = te.create_schedule(D.op)
         xo, xi = s[D].split(D.op.axis[0], factor=num_thread)
         s[D].bind(xo, bx)
         s[D].bind(xi, tx)
         fun = tvm.build(s, [A, B, C, D], "cuda")
-        np_a = np.random.randint(low=-128, high=127, size=(n,lanes))
-        np_b = np.random.randint(low=-128, high=127, size=(n,lanes))
+        np_a = np.random.randint(low=-128, high=127, size=(n, lanes))
+        np_b = np.random.randint(low=-128, high=127, size=(n, lanes))
         np_c = np.random.randint(low=0, high=127, size=(n,))
         np_d = [sum(x * y) + z for x, y, z in zip(np_a, np_b, np_c)]
         ctx = tvm.gpu(0)
@@ -97,10 +102,12 @@ def check_cuda(dtype, n, lanes):
         tvm.testing.assert_allclose(d.asnumpy(), np_d)
     check_cuda("int8", 64, 4)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_vectorize_load():
     num_thread = 8
+
     def check_cuda(dtype, n, lanes):
         ctx = tvm.gpu(0)
         A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
@@ -110,10 +117,10 @@ def check_cuda(dtype, n, lanes):
         s[B].bind(block, bx)
         s[B].bind(thread, tx)
         fun = tvm.build(s, [A, B], "cuda", name="vector_load")
-        np_a = np.random.randint(low=-128, high=127, size=(n,lanes))
+        np_a = np.random.randint(low=-128, high=127, size=(n, lanes))
         a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(np_a)
         b = tvm.nd.empty((n,), B.dtype, ctx)
-        fun(a,b)
+        fun(a, b)
         tvm.testing.assert_allclose(a.asnumpy(), b.asnumpy())
     check_cuda("int8", 64, 2)
     check_cuda("int8", 64, 3)
@@ -121,13 +128,15 @@ def check_cuda(dtype, n, lanes):
     check_cuda("int8", 64, 8)
     check_cuda("int8", 64, 16)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_make_int8():
     def check_cuda(n, value, lanes):
         dtype = 'int8'
         ctx = tvm.gpu(0)
-        A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype))
+        A = te.compute((n, lanes), lambda i,
+                       j: tvm.tir.const(value, dtype=dtype))
         s = te.create_schedule(A.op)
         y, x = s[A].op.axis
         s[A].vectorize(x)
@@ -152,6 +161,7 @@ def check_cuda(n, value, lanes):
 @tvm.testing.requires_cuda
 def test_cuda_inf_nan():
     target = 'cuda'
+
     def check_inf_nan(ctx, n, value, dtype):
         A = te.placeholder((n,), name='A', dtype=dtype)
         inf_value = tvm.tir.const(value, dtype=dtype)
@@ -180,7 +190,8 @@ def test_cuda_shuffle():
     idxm = tvm.tir.indexmod
     a = te.placeholder((64, ), 'int32')
     b = te.placeholder((64, ), 'int32')
-    c = te.compute((64, ), lambda x: a[x] + b[x - idxm(x, 4) + (3 - idxm(x, 4))])
+    c = te.compute((64, ), lambda x: a[x] +
+                   b[x - idxm(x, 4) + (3 - idxm(x, 4))])
     sch = te.create_schedule(c.op)
     x = c.op.axis[0]
     xo, xi = sch[c].split(x, 4)
@@ -192,14 +203,17 @@ def MyVectorize():
         def vectorizer(op):
             if op.for_type == tvm.tir.For.Vectorized:
                 four = tvm.tir.const(4, 'int32')
-                idx = tvm.tir.Ramp(thrx.var * four, tvm.tir.const(1, 'int32'), 4)
+                idx = tvm.tir.Ramp(
+                    thrx.var * four, tvm.tir.const(1, 'int32'), 4)
                 all_ones = tvm.tir.const(1, 'int32x4')
                 store = op.body
                 value = store.value
-                new_a = tvm.tir.Load('int32x4', value.a.buffer_var, idx, all_ones)
+                new_a = tvm.tir.Load(
+                    'int32x4', value.a.buffer_var, idx, all_ones)
                 bs, ids = [], []
                 for i in range(4):
-                    bs.append(tvm.tir.Load('int32', value.b.buffer_var, thrx.var * four + tvm.tir.const(i, 'int32')))
+                    bs.append(tvm.tir.Load('int32', value.b.buffer_var,
+                                           thrx.var * four + tvm.tir.const(i, 'int32')))
                     ids.append(tvm.tir.const(3 - i, 'int32'))
                 new_b = tvm.tir.Shuffle(bs, ids)
                 return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
@@ -215,11 +229,12 @@ def _transform(f, *_):
         a_ = np.array(list(range(64)), dtype='int32')
         b_ = np.array((list(range(4))[::-1]) * 16, dtype='int32')
         c_ = np.zeros((64, ), dtype='int32')
-        ref = a_ +  np.array((list(range(4))) * 16, dtype='int32')
+        ref = a_ + np.array((list(range(4))) * 16, dtype='int32')
         nda, ndb, ndc = [tvm.nd.array(i, tvm.gpu(0)) for i in [a_, b_, c_]]
         module(nda, ndb, ndc)
         tvm.testing.assert_allclose(ndc.asnumpy(), ref)
 
+
 @tvm.testing.parametrize_targets("cuda", "rocm")
 def test_crossthread_reduction1(target, ctx):
     n = te.var("n")
@@ -246,8 +261,8 @@ def verify(nthd):
             a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
             b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
             func(a, b)
-            tvm.testing.assert_allclose(b.asnumpy(), \
-                np.sum(a.asnumpy(), axis=1), rtol=1e-3)
+            tvm.testing.assert_allclose(b.asnumpy(),
+                                        np.sum(a.asnumpy(), axis=1), rtol=1e-3)
 
     verify(16)
     verify(32)
@@ -262,7 +277,8 @@ def test_crossthread_reduction2(target, ctx):
     A = te.placeholder((n, k0, k1), name='A')
     k0 = te.reduce_axis((0, k0), "k0")
     k1 = te.reduce_axis((0, k1), "k1")
-    B = te.compute((n,), lambda i: te.sum(A[i, k0, k1], axis=(k0, k1)), name="B")
+    B = te.compute((n,), lambda i: te.sum(
+        A[i, k0, k1], axis=(k0, k1)), name="B")
 
     def sched(nthdx, nthdy):
         s = te.create_schedule(B.op)
@@ -285,22 +301,23 @@ def verify(nthdx, nthdy):
             a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
             b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
             func(a, b)
-            tvm.testing.assert_allclose(b.asnumpy(), \
-                np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
+            tvm.testing.assert_allclose(b.asnumpy(),
+                                        np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
 
     verify(16, 16)
     verify(32, 32)
     verify(16, 32)
     verify(32, 16)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_reduction_binding():
     k = te.reduce_axis((0, 32), 'k')
     A = te.placeholder((96, 32), name='A')
-    B = te.compute( (96,), lambda m:
-                     te.sum(A[m, k], axis=k),
-                     name='B')
+    B = te.compute((96,), lambda m:
+                   te.sum(A[m, k], axis=k),
+                   name='B')
     s = te.create_schedule(B.op)
 
     s[B].reorder(B.op.reduce_axis[0], B.op.axis[0])
@@ -310,15 +327,16 @@ def test_cuda_reduction_binding():
 
     fcuda = tvm.build(s, [A, B], "cuda")
 
+
 @tvm.testing.parametrize_targets("cuda", "rocm")
 def test_rfactor_predicates(target, ctx):
     n = te.reduce_axis((0, 129), 'n')
     A = te.placeholder((129,), name='A')
-    B = te.compute( (1, ), lambda b:
-                     te.sum(A[n],
-                             axis=n),
-                     name='B'
-    )
+    B = te.compute((1, ), lambda b:
+                   te.sum(A[n],
+                          axis=n),
+                   name='B'
+                   )
 
     s = te.create_schedule(B.op)
 
@@ -341,6 +359,7 @@ def test_rfactor_predicates(target, ctx):
 
     fcuda = tvm.build(s, [A, B], target)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_const_float_to_half():
@@ -367,6 +386,7 @@ def test_cuda_const_float_to_half():
     func(a, c)
     np.testing.assert_equal(c.asnumpy(), a_np > b.value)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_reduction():
@@ -381,7 +401,7 @@ def check(device, dtype, m=32, n=32):
         d = a * b
         e = topi.elemwise_sum([c, d])
         g = topi.sum(e)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             sg = topi.cuda.schedule_reduce(g)
             func = tvm.build(sg, [a, b, g], device)
             a_np = np.random.uniform(size=(m, n)).astype(a.dtype)
@@ -397,6 +417,7 @@ def check(device, dtype, m=32, n=32):
     check("rocm", "float32")
     check("cuda", "float16")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_mix_threaded_and_normal_reduction():
@@ -411,7 +432,7 @@ def check(device, dtype, m=32, n=32):
 
         a = tvm.te.placeholder((m, n), name="a", dtype=dtype)
         b = topi.sum(a)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             sb = tvm.te.create_schedule(b.op)
             i, _ = b.op.reduce_axis
             sb[b].bind(i, tvm.te.thread_axis("threadIdx.x"))
@@ -427,6 +448,7 @@ def check(device, dtype, m=32, n=32):
     check("rocm", "float32")
     check("cuda", "float16")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_floordiv_with_vectorization():
@@ -452,6 +474,7 @@ def test_cuda_floordiv_with_vectorization():
         func(a_nd, b_nd)
         tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_floormod_with_vectorization():
@@ -477,11 +500,12 @@ def test_cuda_floormod_with_vectorization():
         func(a_nd, b_nd)
         tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_casts():
     def check(t0, t1):
-        if (t0 ==  "float16" or t1 == "float16") and not have_fp16(tvm.gpu(0).compute_version):
+        if (t0 == "float16" or t1 == "float16") and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
 
@@ -489,7 +513,8 @@ def check(t0, t1):
         n = 128
         A = te.placeholder((n,), dtype=t0, name='A')
         B = te.placeholder((n,), dtype=t1, name='B')
-        C = te.compute((n,), lambda i: A[i] + topi.cast(B[i], A.dtype), name='C')
+        C = te.compute((n,), lambda i: A[i] +
+                       topi.cast(B[i], A.dtype), name='C')
 
         # schedule
         s = tvm.te.create_schedule(C.op)
@@ -501,7 +526,8 @@ def check(t0, t1):
 
         # correctness
         ctx = tvm.gpu(0)
-        low, high = (0, 20) if t0.startswith('u') or t1.startswith('u') else (-10, 10)
+        low, high = (0, 20) if t0.startswith(
+            'u') or t1.startswith('u') else (-10, 10)
         a_np = np.random.randint(low, high, size=n).astype(A.dtype)
         b_np = np.random.randint(low, high, size=n).astype(B.dtype)
         c_np = (a_np + b_np).astype(A.dtype)
@@ -520,10 +546,12 @@ def skip(t0, t1):
             return True
         return False
 
-    types = ["float16", "float32", "int8", "uint8", "int16", "uint16", "int32", "uint32"]
+    types = ["float16", "float32", "int8", "uint8",
+             "int16", "uint16", "int32", "uint32"]
     for t0, t1 in [(x, y) for x in types for y in types if not skip(x, y)]:
         check(t0, t1)
 
+
 def sched(B):
     s = te.create_schedule(B.op)
     io, ii = s[B].split(s[B].op.axis[0], nparts=1)
@@ -534,30 +562,32 @@ def sched(B):
     s[B].bind(iio, tx)
     return s
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_intrin1():
     test_funcs = [
-        (tvm.tir.floor, lambda x : np.floor(x)),
-        (tvm.tir.ceil,  lambda x : np.ceil(x)),
-        (tvm.tir.trunc, lambda x : np.trunc(x)),
-        (tvm.tir.abs,   lambda x : np.fabs(x)),
-        (tvm.tir.round, lambda x : np.round(x)),
-        (tvm.tir.exp,   lambda x : np.exp(x)),
-        (tvm.tir.exp2,  lambda x : np.exp2(x)),
-        (tvm.tir.exp10, lambda x : np.power(10,x)),
-        (tvm.tir.log,   lambda x : np.log(x)),
-        (tvm.tir.log2,  lambda x : np.log2(x)),
-        (tvm.tir.log10, lambda x : np.log10(x)),
-        (tvm.tir.tan,   lambda x : np.tan(x)),
-        (tvm.tir.cos,   lambda x : np.cos(x)),
-        (tvm.tir.cosh,  lambda x : np.cosh(x)),
-        (tvm.tir.sin,   lambda x : np.sin(x)),
-        (tvm.tir.sinh,  lambda x : np.sinh(x)),
-        (tvm.tir.atan,  lambda x : np.arctan(x)),
-        (tvm.tir.tanh,  lambda x : np.tanh(x)),
-        (tvm.tir.sqrt,  lambda x : np.sqrt(x)),
+        (tvm.tir.floor, lambda x: np.floor(x)),
+        (tvm.tir.ceil, lambda x: np.ceil(x)),
+        (tvm.tir.trunc, lambda x: np.trunc(x)),
+        (tvm.tir.abs, lambda x: np.fabs(x)),
+        (tvm.tir.round, lambda x: np.round(x)),
+        (tvm.tir.exp, lambda x: np.exp(x)),
+        (tvm.tir.exp2, lambda x: np.exp2(x)),
+        (tvm.tir.exp10, lambda x: np.power(10, x)),
+        (tvm.tir.log, lambda x: np.log(x)),
+        (tvm.tir.log2, lambda x: np.log2(x)),
+        (tvm.tir.log10, lambda x: np.log10(x)),
+        (tvm.tir.tan, lambda x: np.tan(x)),
+        (tvm.tir.cos, lambda x: np.cos(x)),
+        (tvm.tir.cosh, lambda x: np.cosh(x)),
+        (tvm.tir.sin, lambda x: np.sin(x)),
+        (tvm.tir.sinh, lambda x: np.sinh(x)),
+        (tvm.tir.atan, lambda x: np.arctan(x)),
+        (tvm.tir.tanh, lambda x: np.tanh(x)),
+        (tvm.tir.sqrt, lambda x: np.sqrt(x)),
     ]
+
     def run_test(tvm_intrin, np_func, dtype):
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
@@ -571,7 +601,8 @@ def run_test(tvm_intrin, np_func, dtype):
                     tvm.tir.cosh,
                     tvm.tir.sinh}
         if dtype == "float16" and tvm_intrin in skip_set:
-            print("Skip because '{0}' does not support fp16 yet".format(tvm_intrin.__name__))
+            print("Skip because '{0}' does not support fp16 yet".format(
+                tvm_intrin.__name__))
             return
 
         n = 128
@@ -583,20 +614,23 @@ def run_test(tvm_intrin, np_func, dtype):
         a = tvm.nd.array(np.random.uniform(0, 1, size=n).astype(A.dtype), ctx)
         b = tvm.nd.array(np.zeros(shape=(n,)).astype(A.dtype), ctx)
         f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), np_func(a.asnumpy()), atol=1e-3, rtol=1e-3)
+        tvm.testing.assert_allclose(b.asnumpy(), np_func(
+            a.asnumpy()), atol=1e-3, rtol=1e-3)
 
     for func in test_funcs:
         run_test(*func, "float32")
         run_test(*func, "float16")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_intrin2(dtype="float32"):
     c2 = tvm.tir.const(2, dtype=dtype)
     test_funcs = [
-        (tvm.tir.power, lambda x : np.power(x, 2.0)),
-        (tvm.tir.fmod,  lambda x : np.fmod(x, 2.0))
+        (tvm.tir.power, lambda x: np.power(x, 2.0)),
+        (tvm.tir.fmod, lambda x: np.fmod(x, 2.0))
     ]
+
     def run_test(tvm_intrin, np_func):
         n = 128
         A = te.placeholder((n,), dtype=dtype, name='A')
@@ -607,11 +641,13 @@ def run_test(tvm_intrin, np_func):
         a = tvm.nd.array(np.random.uniform(0, 1, size=n).astype(A.dtype), ctx)
         b = tvm.nd.array(np.zeros(shape=(n,)).astype(A.dtype), ctx)
         f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), np_func(a.asnumpy()), atol=1e-3, rtol=1e-3)
+        tvm.testing.assert_allclose(b.asnumpy(), np_func(
+            a.asnumpy()), atol=1e-3, rtol=1e-3)
 
     for func in test_funcs:
         run_test(*func)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_popcount():
@@ -629,7 +665,8 @@ def run_test(dtype):
         s = sched(B)
         f = tvm.build(s, [A, B], "cuda")
         ctx = tvm.gpu(0)
-        a = tvm.nd.array(np.random.randint(0, 100000, size=n).astype(A.dtype), ctx)
+        a = tvm.nd.array(np.random.randint(
+            0, 100000, size=n).astype(A.dtype), ctx)
         b = tvm.nd.array(np.zeros(shape=(n,)).astype(B.dtype), ctx)
         f(a, b)
         ref = np.vectorize(ref_popcount)(a.asnumpy())
@@ -638,6 +675,7 @@ def run_test(dtype):
     run_test("uint32")
     run_test("uint64")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_cuda_vectorize_load_permute_pad():
@@ -677,6 +715,7 @@ def check_cuda(dtype, n, l, padding, lanes):
     check_cuda("float16", 64, 16, 3, 4)
     check_cuda("float32", 64, 16, 3, 4)
 
+
 def vcf_check_common(s, args):
     N = 512
 
@@ -694,7 +733,8 @@ def pre_visit(stmt):
         if isinstance(stmt, tvm.tir.Broadcast):
             inside_broadcast[0] = True
             # Check Broadcast[Imm numbers] or Broadcast[Load] patterns
-            assert isinstance(stmt.value, (tvm.tir.IntImm, tvm.tir.FloatImm, tvm.tir.Load))
+            assert isinstance(stmt.value, (tvm.tir.IntImm,
+                                           tvm.tir.FloatImm, tvm.tir.Load))
         if isinstance(stmt, tvm.tir.Store):
             # Check Store[Ramp] pattern
             assert isinstance(stmt.index, tvm.tir.Ramp)
@@ -725,6 +765,7 @@ def post_visit(stmt):
     tvm.testing.assert_allclose(c.asnumpy(), np.dot(
         a.asnumpy(), b.asnumpy()), rtol=1e-5)
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_cooperative_fetching_x():
@@ -776,6 +817,7 @@ def test_vectorized_cooperative_fetching_x():
 
     vcf_check_common(s, [A, B, C])
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_vectorized_cooperative_fetching_xy():
@@ -831,20 +873,22 @@ def test_vectorized_cooperative_fetching_xy():
 
     vcf_check_common(s, [A, B, C])
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_unrolled_vectorization():
     dtype = 'float32'
     target = 'cuda'
 
-    ## Compute declaration
+    # Compute declaration
     N = 128
     A = te.placeholder((N, N), name='A')
     B = te.placeholder((N, N), name='B')
     k = te.reduce_axis((0, N), name='k')
-    C = te.compute((N, N), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C')
+    C = te.compute((N, N), lambda i, j: te.sum(
+        A[i][k] * B[k][j], axis=[k]), name='C')
 
-    ## Schedule
+    # Schedule
     s = te.create_schedule([C.op])
     CC = s.cache_write(C, "local")
     i, j = s[C].op.axis
@@ -859,7 +903,7 @@ def test_unrolled_vectorization():
     s[CC].unroll(ki)
     s[CC].vectorize(j)
 
-    ## Check correctness
+    # Check correctness
     ctx = tvm.context(target)
     a_tvm = tvm.nd.array(np.ones((N, N)).astype(dtype), ctx=ctx)
     b_tvm = tvm.nd.array(np.ones((N, N)).astype(dtype), ctx=ctx)
@@ -869,6 +913,7 @@ def test_unrolled_vectorization():
     c_np = c_tvm.asnumpy()
     tvm.testing.assert_allclose(c_np, N * np.ones((N, N)))
 
+
 if __name__ == "__main__":
     test_cuda_vectorize_add()
     test_cuda_multiply_add()
@@ -893,4 +938,3 @@ def test_unrolled_vectorization():
     test_vectorized_cooperative_fetching_x()
     test_vectorized_cooperative_fetching_xy()
     test_unrolled_vectorization()
-
diff --git a/tests/python/unittest/test_target_codegen_llvm.py b/tests/python/unittest/test_target_codegen_llvm.py
index fd7a764a5baa..9aa843eb17b8 100644
--- a/tests/python/unittest/test_target_codegen_llvm.py
+++ b/tests/python/unittest/test_target_codegen_llvm.py
@@ -50,7 +50,8 @@ def test_llvm_void_intrin():
     ib = tvm.tir.ir_builder.create()
     A = ib.pointer("uint8", name="A")
     # Create an intrinsic that returns void.
-    x = tvm.tir.call_llvm_intrin('', 'llvm.va_start', tvm.tir.const(1, 'uint32'), A)
+    x = tvm.tir.call_llvm_intrin(
+        '', 'llvm.va_start', tvm.tir.const(1, 'uint32'), A)
     ib.emit(x)
     body = ib.get()
     mod = tvm.IRModule.from_expr(
@@ -67,20 +68,20 @@ def test_llvm_overloaded_intrin():
 
     def use_llvm_intrinsic(A, C):
         ib = tvm.tir.ir_builder.create()
-        L = A.vload((0,0))
+        L = A.vload((0, 0))
         I = tvm.tir.call_llvm_pure_intrin('int32', 'llvm.ctlz',
-            tvm.tir.const(2, 'uint32'), L, tvm.tir.const(0, 'int1'))
-        S = C.vstore((0,0), I)
+                                          tvm.tir.const(2, 'uint32'), L, tvm.tir.const(0, 'int1'))
+        S = C.vstore((0, 0), I)
         ib.emit(S)
         return ib.get()
 
-    A = tvm.te.placeholder((1,1), dtype = 'int32', name = 'A')
-    C = tvm.te.extern((1,1), [A],
-        lambda ins, outs: use_llvm_intrinsic(ins[0], outs[0]),
-        name = 'C' , dtype = 'int32')
+    A = tvm.te.placeholder((1, 1), dtype='int32', name='A')
+    C = tvm.te.extern((1, 1), [A],
+                      lambda ins, outs: use_llvm_intrinsic(ins[0], outs[0]),
+                      name='C', dtype='int32')
 
     s = tvm.te.create_schedule(C.op)
-    f = tvm.build(s, [A, C], target = 'llvm')
+    f = tvm.build(s, [A, C], target='llvm')
 
 
 @tvm.testing.requires_llvm
@@ -94,8 +95,9 @@ def test_llvm_import():
     n = 10
     A = te.placeholder((n,), name='A')
     B = te.compute((n,), lambda *i:
-                    tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0),
-                    name='B')
+                   tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0),
+                   name='B')
+
     def check_llvm(use_file):
         if not clang.find_clang(required=False):
             print("skip because clang is not available")
@@ -121,13 +123,13 @@ def check_llvm(use_file):
     check_llvm(use_file=False)
 
 
-
 @tvm.testing.requires_llvm
 def test_llvm_lookup_intrin():
     ib = tvm.tir.ir_builder.create()
     A = ib.pointer("uint8x8", name="A")
     z = tvm.tir.const(0, 'int32')
-    x = tvm.tir.call_llvm_pure_intrin("uint8x8", "llvm.ctpop.v8i8", tvm.tir.const(1, 'uint32'), A[z])
+    x = tvm.tir.call_llvm_pure_intrin(
+        "uint8x8", "llvm.ctpop.v8i8", tvm.tir.const(1, 'uint32'), A[z])
     ib.emit(x)
     body = ib.get()
     mod = tvm.IRModule.from_expr(
@@ -137,9 +139,10 @@ def test_llvm_lookup_intrin():
 
 @tvm.testing.requires_llvm
 def test_llvm_large_uintimm():
-    value =  (1 << 63) + 123
+    value = (1 << 63) + 123
     other = tvm.tir.const(3, "uint64")
-    A = te.compute((), lambda : tvm.tir.const(value, "uint64") + other, name='A')
+    A = te.compute((), lambda: tvm.tir.const(
+        value, "uint64") + other, name='A')
     s = te.create_schedule(A.op)
 
     def check_llvm():
@@ -179,7 +182,7 @@ def check_llvm():
             elem_offset=te.size_var('Aoffset'),
             offset_factor=8,
             name='A')
-        binds = {A : Ab}
+        binds = {A: Ab}
         # BUILD and invoke the kernel.
         f = tvm.build(s, [A, B, C], "llvm", binds=binds)
         ctx = tvm.cpu(0)
@@ -220,8 +223,8 @@ def check_llvm():
         c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
         f(a, c)
         tvm.testing.assert_allclose(c.asnumpy(),
-                                   np.sqrt(a.asnumpy() + 1) * 2 + 2,
-                                   rtol=1e-5)
+                                    np.sqrt(a.asnumpy() + 1) * 2 + 2,
+                                    rtol=1e-5)
 
     check_llvm()
 
@@ -231,7 +234,7 @@ def test_llvm_flip_pipeline():
     def check_llvm(nn, base):
         n = tvm.runtime.convert(nn)
         A = te.placeholder((n + base), name='A')
-        C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C')
+        C = te.compute((n,), lambda i: A(nn + base - i - 1), name='C')
         s = te.create_schedule(C.op)
         xo, xi = s[C].split(C.op.axis[0], factor=4)
         s[C].parallel(xo)
@@ -241,7 +244,8 @@ def check_llvm(nn, base):
         ctx = tvm.cpu(0)
         # launch the kernel.
         n = nn
-        a = tvm.nd.array(np.random.uniform(size=(n + base)).astype(A.dtype), ctx)
+        a = tvm.nd.array(np.random.uniform(
+            size=(n + base)).astype(A.dtype), ctx)
         c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
         f(a, c)
         tvm.testing.assert_allclose(
@@ -257,7 +261,8 @@ def test_llvm_vadd_pipeline():
     def check_llvm(n, lanes):
         A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes)
         B = te.compute((n,), lambda i: A[i], name='B')
-        C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C')
+        C = te.compute((n,), lambda i: B[i] +
+                       tvm.tir.const(1, A.dtype), name='C')
         s = te.create_schedule(C.op)
         xo, xi = s[C].split(C.op.axis[0], nparts=2)
         _, xi = s[C].split(xi, factor=2)
@@ -295,7 +300,8 @@ def check_llvm(nn, base, stride):
         ctx = tvm.cpu(0)
         # launch the kernel.
         n = nn
-        a = tvm.nd.array(np.random.uniform(size=(n + base, stride)).astype(A.dtype), ctx)
+        a = tvm.nd.array(np.random.uniform(
+            size=(n + base, stride)).astype(A.dtype), ctx)
         c = tvm.nd.array(np.zeros((n, stride), dtype=C.dtype), ctx)
         f(a, c)
         tvm.testing.assert_allclose(
@@ -329,6 +335,7 @@ def check_llvm():
             c.asnumpy(), a.asnumpy() + 1 + 1)
     check_llvm()
 
+
 @tvm.testing.requires_llvm
 def test_multiple_func():
     nn = 1024
@@ -340,6 +347,7 @@ def test_multiple_func():
     xo, xi = s[C].split(C.op.axis[0], factor=4)
     s[C].parallel(xo)
     s[C].vectorize(xi)
+
     def check_llvm():
         # build two functions
         f2 = tvm.lower(s, [A, B, C], name="fadd1")
@@ -363,12 +371,12 @@ def check_llvm():
     check_llvm()
 
 
-
 @tvm.testing.requires_llvm
 def test_llvm_condition():
     def check_llvm(n, offset):
         A = te.placeholder((n, ), name='A')
-        C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C')
+        C = te.compute((n,), lambda i: tvm.tir.if_then_else(
+            i >= offset, A[i], 0.0), name='C')
         s = te.create_schedule(C.op)
         # build and invoke the kernel.
         f = tvm.build(s, [A, C], "llvm")
@@ -393,7 +401,8 @@ def check_llvm(n):
         f = tvm.build(s, [A, C], "llvm")
         ctx = tvm.cpu(0)
         # launch the kernel.
-        a = tvm.nd.array(np.random.randint(0, 2, size=(n,)).astype(A.dtype), ctx)
+        a = tvm.nd.array(np.random.randint(
+            0, 2, size=(n,)).astype(A.dtype), ctx)
         c = tvm.nd.empty((n,), C.dtype, ctx)
         f(a, c)
         c_np = a.asnumpy() == 1
@@ -407,14 +416,15 @@ def check_llvm(n):
         A = te.placeholder((n, ), name='A')
         scale = te.placeholder((), name='scale')
         k = te.reduce_axis((0, n), name="k")
-        C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C")
-        D = te.compute((), lambda : C() + 1)
+        C = te.compute((), lambda: te.sum(A[k] * scale(), axis=k), name="C")
+        D = te.compute((), lambda: C() + 1)
         s = te.create_schedule(D.op)
         # build and invoke the kernel.
         f = tvm.build(s, [A, scale, D], "llvm")
         ctx = tvm.cpu(0)
         # launch the kernel.
-        a = tvm.nd.array(np.random.randint(0, 2, size=(n,)).astype(A.dtype), ctx)
+        a = tvm.nd.array(np.random.randint(
+            0, 2, size=(n,)).astype(A.dtype), ctx)
         sc = tvm.nd.array(
             np.random.randint(0, 2, size=()).astype(scale.dtype), ctx)
         d = tvm.nd.empty((), D.dtype, ctx)
@@ -423,6 +433,7 @@ def check_llvm(n):
         tvm.testing.assert_allclose(d.asnumpy(), d_np)
     check_llvm(64)
 
+
 @tvm.testing.requires_llvm
 def test_rank_zero_bound_checkers():
     def check_llvm(n):
@@ -430,14 +441,16 @@ def check_llvm(n):
             A = te.placeholder((n, ), name='A')
             scale = te.placeholder((), name='scale')
             k = te.reduce_axis((0, n), name="k")
-            C = te.compute((), lambda : te.sum(A[k] * scale(), axis=k), name="C")
-            D = te.compute((), lambda : C() + 1)
+            C = te.compute((), lambda: te.sum(
+                A[k] * scale(), axis=k), name="C")
+            D = te.compute((), lambda: C() + 1)
             s = te.create_schedule(D.op)
             # build and invoke the kernel.
             f = tvm.build(s, [A, scale, D], "llvm")
             ctx = tvm.cpu(0)
             # launch the kernel.
-            a = tvm.nd.array(np.random.randint(0, 2, size=(n,)).astype(A.dtype), ctx)
+            a = tvm.nd.array(np.random.randint(
+                0, 2, size=(n,)).astype(A.dtype), ctx)
             sc = tvm.nd.array(
                 np.random.randint(0, 2, size=()).astype(scale.dtype), ctx)
             d = tvm.nd.empty((), D.dtype, ctx)
@@ -499,16 +512,19 @@ def check(start, end, dstart, dend, dtype, floor_div=False):
         A = te.placeholder((end - start + 1,), name="A", dtype=dtype)
         B = te.placeholder((dend - dstart + 1,), name="B", dtype=dtype)
         # We clip values with min and max so that simplifiers know the ranges of values
-        clipa = lambda x: tvm.te.min(tvm.tir.const(end, dtype), tvm.te.max(tvm.tir.const(start, dtype), x))
-        clipb = lambda x: tvm.te.min(tvm.tir.const(dend, dtype), tvm.te.max(tvm.tir.const(dstart, dtype), x))
+
+        def clipa(x): return tvm.te.min(tvm.tir.const(end, dtype),
+                                        tvm.te.max(tvm.tir.const(start, dtype), x))
+        def clipb(x): return tvm.te.min(tvm.tir.const(dend, dtype),
+                                        tvm.te.max(tvm.tir.const(dstart, dtype), x))
         # If the range is just a single point, use the constant itself
         if start == end:
-            clipa = lambda x: tvm.tir.const(start, dtype)
+            def clipa(x): return tvm.tir.const(start, dtype)
         if dstart == dend:
-            clipb = lambda x: tvm.tir.const(dstart, dtype)
+            def clipb(x): return tvm.tir.const(dstart, dtype)
         # D are division results and M are modulo results
         [D, M] = te.compute((end - start + 1, dend - dstart + 1),
-                             lambda i, j: (div(clipa(A[i]), clipb(B[j])),
+                            lambda i, j: (div(clipa(A[i]), clipb(B[j])),
                                           mod(clipa(A[i]), clipb(B[j]))))
 
         s = te.create_schedule([D.op, M.op])
@@ -557,29 +573,31 @@ def _show_info():
                     _show_info()
                     raise AssertionError("Incorrect division result: {}({}, {}) is {} "
                                          "but should be {}".format(div.__name__, i, j,
-                                                                   D_arr[i - start, j - dstart],
+                                                                   D_arr[i - start,
+                                                                         j - dstart],
                                                                    dref))
                 if M_arr[i - start, j - dstart] != mref:
                     _show_info()
                     raise AssertionError("Incorrect modulo result: {}({}, {}) is {} "
                                          "but should be {}".format(mod.__name__, i, j,
-                                                                   M_arr[i - start, j - dstart],
+                                                                   M_arr[i - start,
+                                                                         j - dstart],
                                                                    mref))
 
     # Try different ranges to cover different cases
     for start, end in [(-12, -12), (-11, -1), (-11,  0), (0, 0),
-                       ( 12,  12), (  1, 11), (  0, 11), (-11, 11)]:
+                       (12,  12), (1, 11), (0, 11), (-11, 11)]:
         for dstart, dend in [(-11, -1), (-11,  0), (-4, -4), (-2, -2),
-                             (  1, 11), (  0, 11), ( 4,  4), ( 2,  2), (-11, 11)]:
-                if end < start or dend < dstart or (dend == 0 and dstart == 0):
-                    continue
-                check(start, end, dstart, dend, 'int32', floor_div=False)
-                check(start, end, dstart, dend, 'int32', floor_div=True)
-                check(start, end, dstart, dend, 'int8', floor_div=False)
-                check(start, end, dstart, dend, 'int8', floor_div=True)
-                if start >= 0 and dstart >= 0:
-                    check(start, end, dstart, dend, 'uint32', floor_div=False)
-                    check(start, end, dstart, dend, 'uint32', floor_div=True)
+                             (1, 11), (0, 11), (4,  4), (2,  2), (-11, 11)]:
+            if end < start or dend < dstart or (dend == 0 and dstart == 0):
+                continue
+            check(start, end, dstart, dend, 'int32', floor_div=False)
+            check(start, end, dstart, dend, 'int32', floor_div=True)
+            check(start, end, dstart, dend, 'int8', floor_div=False)
+            check(start, end, dstart, dend, 'int8', floor_div=True)
+            if start >= 0 and dstart >= 0:
+                check(start, end, dstart, dend, 'uint32', floor_div=False)
+                check(start, end, dstart, dend, 'uint32', floor_div=True)
 
     # Additional tests for uint8
     for dstart, dend in [(0, 11), (1, 11), (2, 2), (4, 4)]:
@@ -588,11 +606,12 @@ def _show_info():
         check(0, 255, dstart, dend, 'uint8', floor_div=False)
         check(0, 255, dstart, dend, 'uint8', floor_div=True)
 
+
 @tvm.testing.requires_llvm
 def test_llvm_fp_math():
     def check_llvm_reciprocal(n):
         A = te.placeholder((n,), name='A')
-        B = te.compute((n,), lambda i: te.div(1.0,(1e+37*A[i])), name='B')
+        B = te.compute((n,), lambda i: te.div(1.0, (1e+37*A[i])), name='B')
 
         s = te.create_schedule(B.op)
         f = tvm.build(s, [A, B], "llvm")
@@ -634,6 +653,7 @@ def test_dwarf_debug_information():
     xo, xi = s[C].split(C.op.axis[0], factor=4)
     s[C].parallel(xo)
     s[C].vectorize(xi)
+
     def check_llvm_object():
         if tvm.target.codegen.llvm_version_major() < 5:
             return
@@ -705,12 +725,15 @@ def test_llvm_shuffle():
     def my_vectorize():
         def vectorizer(op):
             store = op.body
-            idx = tvm.tir.Ramp(tvm.tir.const(0, 'int32'), tvm.tir.const(1, 'int32'), 8)
+            idx = tvm.tir.Ramp(tvm.tir.const(0, 'int32'),
+                               tvm.tir.const(1, 'int32'), 8)
             all_ones = tvm.tir.const(1, 'int32x8')
             value = store.value
-            b_idx = tvm.tir.Shuffle([idx], [tvm.tir.const(i, 'int32') for i in range(7, -1, -1)])
+            b_idx = tvm.tir.Shuffle(
+                [idx], [tvm.tir.const(i, 'int32') for i in range(7, -1, -1)])
             new_a = tvm.tir.Load('int32x8', value.a.buffer_var, idx, all_ones)
-            new_b = tvm.tir.Load('int32x8', value.b.buffer_var, b_idx, all_ones)
+            new_b = tvm.tir.Load(
+                'int32x8', value.b.buffer_var, b_idx, all_ones)
             value = new_a + new_b
             return tvm.tir.Store(store.buffer_var, new_a + new_b, idx, all_ones)
 
@@ -727,7 +750,9 @@ def _transform(f, *_):
         b_ = tvm.nd.array(np.arange(8, 0, -1, dtype='int32'))
         c_ = tvm.nd.array(np.zeros((8, ), dtype='int32'))
         module(a_, b_, c_)
-        tvm.testing.assert_allclose(c_.asnumpy(), (a_.asnumpy() * 2).astype('int32'))
+        tvm.testing.assert_allclose(
+            c_.asnumpy(), (a_.asnumpy() * 2).astype('int32'))
+
 
 def np_float2np_bf16(arr):
     ''' Convert a numpy array of float to a numpy array
@@ -736,22 +761,26 @@ def np_float2np_bf16(arr):
     bias = np.bitwise_and(np.right_shift(orig, 16), 1) + 0x7FFF
     return np.right_shift(orig + bias, 16).astype('uint16')
 
+
 def np_float2tvm_bf16(arr):
     ''' Convert a numpy array of float to a TVM array
     of bf16'''
     nparr = np_float2np_bf16(arr)
     return tvm.nd.empty(nparr.shape, 'uint16').copyfrom(nparr)
 
+
 def np_bf162np_float(arr):
     ''' Convert a numpy array of bf16 (uint16) to a numpy array
     of float'''
     u32 = np.left_shift(arr.astype('uint32'), 16)
     return u32.view('<f4')
 
+
 def np_bf16_cast_and_cast_back(arr):
     ''' Convert a numpy array of float to bf16 and cast back'''
     return np_bf162np_float(np_float2np_bf16(arr))
 
+
 @tvm.testing.requires_llvm
 def test_llvm_bf16():
     def dotest(do_vectorize):
@@ -760,7 +789,7 @@ def dotest(do_vectorize):
         B = te.placeholder((32, ), dtype='bfloat16')
         d = te.compute((32, ), lambda x: A[x] + B[x])
         sch = te.create_schedule(d.op)
-        print(tvm.lower(sch, [A,B,d]))
+        print(tvm.lower(sch, [A, B, d]))
         if do_vectorize:
             sch[d].vectorize(d.op.axis[0])
         module = tvm.build(sch, [A, B, d])
@@ -777,13 +806,15 @@ def dotest(do_vectorize):
     dotest(True)
     dotest(False)
 
+
 @tvm.testing.requires_llvm
 def test_llvm_crt_static_lib():
     A = te.placeholder((32, ), dtype='bfloat16')
     B = te.placeholder((32, ), dtype='bfloat16')
     d = te.compute((32, ), lambda x: A[x] + B[x])
     sch = te.create_schedule(d.op)
-    module = tvm.build(sch, [A, B, d], target=tvm.target.create('llvm --system-lib --runtime=c'))
+    module = tvm.build(sch, [A, B, d], target=tvm.target.Target(
+        'llvm --system-lib --runtime=c'))
     print(module.get_source())
     module.save('test.o')
 
diff --git a/tests/python/unittest/test_target_custom_datatypes.py b/tests/python/unittest/test_target_custom_datatypes.py
index eb48d83af077..4c32066af360 100644
--- a/tests/python/unittest/test_target_custom_datatypes.py
+++ b/tests/python/unittest/test_target_custom_datatypes.py
@@ -51,7 +51,7 @@ def lower_datatypes_and_build(schedule, args):
     process, we won't need to do this manually.
     TODO(gus) integrate datatype lowering into build process; change this test"""
     mod = tvm.lower(schedule, args)
-    target = tvm.target.create(tgt)
+    target = tvm.target.Target(tgt)
     mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod)
     mod = tvm.tir.transform.LowerCustomDatatypes()(mod)
     return tvm.build(mod, target=tgt)
diff --git a/tests/python/unittest/test_target_target.py b/tests/python/unittest/test_target_target.py
index d19f122d08f7..b34dcad17fe8 100644
--- a/tests/python/unittest/test_target_target.py
+++ b/tests/python/unittest/test_target_target.py
@@ -48,20 +48,20 @@ def test_target_dispatch():
     with tvm.target.rocm():
         assert mygeneric(1) == 4
 
-    with tvm.target.create("cuda"):
+    with tvm.target.Target("cuda"):
         assert mygeneric(1) == 3
 
     with tvm.target.arm_cpu():
         assert mygeneric(1) == 11
 
-    with tvm.target.create("metal"):
+    with tvm.target.Target("metal"):
         assert mygeneric(1) == 3
 
     assert tvm.target.Target.current() is None
 
 
 def test_target_string_parse():
-    target = tvm.target.create("cuda -model=unknown -libs=cublas,cudnn")
+    target = tvm.target.Target("cuda -model=unknown -libs=cublas,cudnn")
 
     assert target.kind.name == "cuda"
     assert target.model == "unknown"
@@ -98,14 +98,15 @@ def test_target_config():
     target_config_str = json.dumps(target_config)
     # Test both dictionary input and json string.
     for config in [target_config, target_config_str]:
-        target = tvm.target.create(config)
+        target = tvm.target.Target(config)
         assert target.kind.name == 'llvm'
         assert all([key in target.keys for key in ['arm_cpu', 'cpu']])
         assert target.device_name == 'arm_cpu'
         assert target.libs == ['cblas']
         assert 'system-lib' in str(target)
         assert target.attrs['mfloat-abi'] == 'hard'
-        assert all([attr in target.attrs['mattr'] for attr in ['+neon', '-avx512f']])
+        assert all([attr in target.attrs['mattr']
+                    for attr in ['+neon', '-avx512f']])
 
 
 def test_config_map():
@@ -119,10 +120,21 @@ def test_config_map():
     }
     failed = False
     try:
-        target = tvm.target.create(target_config)
-    except AttributeError:
+        tvm.target.Target(target_config)
+    except ValueError:
         failed = True
-    assert failed == True
+    assert failed
+
+
+def test_composite_target():
+    tgt = tvm.target.Target(
+        "composite --target_host=llvm --devices=cuda,opencl")
+    assert tgt.kind.name == "composite"
+    assert tgt.attrs["target_host"].kind.name == "llvm"
+    assert len(tgt.attrs["devices"]) == 2
+    cuda_device, opencl_device = tgt.attrs["devices"]
+    assert cuda_device.kind.name == "cuda"
+    assert opencl_device.kind.name == "opencl"
 
 
 if __name__ == "__main__":
@@ -131,3 +143,4 @@ def test_config_map():
     test_target_create()
     test_target_config()
     test_config_map()
+    test_composite_target()
diff --git a/tests/python/unittest/test_te_hybrid_script.py b/tests/python/unittest/test_te_hybrid_script.py
index 6640420c3cf9..94ec35516a92 100644
--- a/tests/python/unittest/test_te_hybrid_script.py
+++ b/tests/python/unittest/test_te_hybrid_script.py
@@ -382,7 +382,7 @@ def max_threads(a):
         return b
 
     a = te.placeholder((10000, ), 'float32')
-    with tvm.target.create('cuda'):
+    with tvm.target.Target('cuda'):
         func, ins, outs = run_and_check(max_threads, [a], target='cuda')
         run_and_check(func, ins, outs=outs, target='cuda')
 
diff --git a/tests/python/unittest/test_te_tensor_overload.py b/tests/python/unittest/test_te_tensor_overload.py
index 4b11c009d41b..577cdfb324c3 100644
--- a/tests/python/unittest/test_te_tensor_overload.py
+++ b/tests/python/unittest/test_te_tensor_overload.py
@@ -29,7 +29,7 @@ def test_operator_type_and_tags():
     A = te.placeholder((), name='A')
     B = te.placeholder((10, 5), name='B')
     B1 = B[0]
-    B2 = B[0,0]
+    B2 = B[0, 0]
 
     assert isinstance(k + n, tvm.tir.PrimExpr)
     assert isinstance(n + n, tvm.tir.PrimExpr)
@@ -84,7 +84,8 @@ def test_combination():
     c = tvm.nd.array(np.random.uniform(size=(n, m)).astype(C.dtype), ctx)
     d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
     foo(x, a, b, c, d)
-    tvm.testing.assert_allclose(d.asnumpy(), k + a.asnumpy() - b.asnumpy() * c.asnumpy() + x)
+    tvm.testing.assert_allclose(
+        d.asnumpy(), k + a.asnumpy() - b.asnumpy() * c.asnumpy() + x)
 
 
 def verify_tensor_scalar_bop(shape, typ="add"):
@@ -109,7 +110,7 @@ def check_device(device):
             return
         ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_elemwise_schedule(device)(B)
 
         k_ = 2
@@ -155,10 +156,11 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(C)
 
-        foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ)
+        foo = tvm.build(s, [A, B, C], device,
+                        name="broadcast_binary" + "_" + typ)
         lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
         rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
         if typ == "add":
@@ -178,7 +180,8 @@ def check_device(device):
         out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
         for _ in range(1):
             foo(lhs_nd, rhs_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(
+            out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
 
     for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
         check_device(device)
@@ -193,13 +196,15 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
 
-        conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(device)
+        conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(
+            device)
 
         k = 10.0
         dilation = (1, 1)
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             A = te.placeholder((batch, in_channel, in_size, in_size), name='A')
-            W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
+            W = te.placeholder(
+                (num_filter, in_channel, kernel, kernel), name='W')
             B = conv2d_nchw(A, W, stride, padding, dilation, A.dtype)
             if typ == "add":
                 C = B + k
@@ -215,10 +220,14 @@ def check_device(device):
 
         foo = tvm.build(s, [A, W, B, C], device, name="conv2d_scalar_" + typ)
 
-        a_npy = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
-        w_npy = np.random.uniform(size=get_const_tuple(W.shape)).astype(W.dtype)
-        b_npy = tvm.topi.testing.conv2d_nchw_python(a_npy, w_npy, stride, padding)
-        c_npy = np.random.uniform(size=get_const_tuple(B.shape)).astype(B.dtype)
+        a_npy = np.random.uniform(
+            size=get_const_tuple(A.shape)).astype(A.dtype)
+        w_npy = np.random.uniform(
+            size=get_const_tuple(W.shape)).astype(W.dtype)
+        b_npy = tvm.topi.testing.conv2d_nchw_python(
+            a_npy, w_npy, stride, padding)
+        c_npy = np.random.uniform(
+            size=get_const_tuple(B.shape)).astype(B.dtype)
         if typ == "add":
             c_npy = b_npy + k
         elif typ == "sub":
@@ -235,7 +244,8 @@ def check_device(device):
         b_nd = tvm.nd.array(np.empty(b_npy.shape).astype(B.dtype), ctx)
         c_nd = tvm.nd.array(np.empty(c_npy.shape).astype(C.dtype), ctx)
         foo(a_nd, w_nd, b_nd, c_nd)
-        tvm.testing.assert_allclose(c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(
+            c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)
 
     for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
         check_device(device)
diff --git a/tests/python/unittest/test_tir_analysis_verify_memory.py b/tests/python/unittest/test_tir_analysis_verify_memory.py
index 7022e285a4c0..7ec3fde28b53 100644
--- a/tests/python/unittest/test_tir_analysis_verify_memory.py
+++ b/tests/python/unittest/test_tir_analysis_verify_memory.py
@@ -30,24 +30,23 @@
 #
 @tvm.testing.uses_gpu
 def test_verify_memory_all_bind():
-  n = te.var("n")
-  A = te.placeholder((n,), name='A')
-  B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+    n = te.var("n")
+    A = te.placeholder((n,), name='A')
+    B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
 
-  # B is bound to threads.
-  s = te.create_schedule(B.op)
-  bx, tx = s[B].split(B.op.axis[0], factor=64)
-  s[B].bind(bx, te.thread_axis("blockIdx.x"))
-  s[B].bind(tx, te.thread_axis("threadIdx.x"))
+    # B is bound to threads.
+    s = te.create_schedule(B.op)
+    bx, tx = s[B].split(B.op.axis[0], factor=64)
+    s[B].bind(bx, te.thread_axis("blockIdx.x"))
+    s[B].bind(tx, te.thread_axis("threadIdx.x"))
 
-  mod = tvm.lower(s, [A, B])
-
-  for dev_type in gpu_devices + other_devices:
-      if tvm.testing.device_enabled(dev_type):
-          binded_mod = tvm.tir.transform.Apply(
-              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-          tvm.tir.transform.VerifyMemory()(binded_mod)
+    mod = tvm.lower(s, [A, B])
 
+    for dev_type in gpu_devices + other_devices:
+        if tvm.testing.device_enabled(dev_type):
+            binded_mod = tvm.tir.transform.Apply(
+                lambda f: f.with_attr("target", tvm.target.Target(dev_type)))(mod)
+            tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 # Computations are not bound.
@@ -55,27 +54,27 @@ def test_verify_memory_all_bind():
 #
 @tvm.testing.uses_gpu
 def test_verify_memory_not_bind():
-  n = te.var("n")
-  A = te.placeholder((n,), name='A')
-  B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+    n = te.var("n")
+    A = te.placeholder((n,), name='A')
+    B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
 
-  # B is not bound to threads.
-  s = te.create_schedule(B.op)
+    # B is not bound to threads.
+    s = te.create_schedule(B.op)
 
-  mod = tvm.lower(s, [A, B])
+    mod = tvm.lower(s, [A, B])
 
-  for dev_type in gpu_devices:
-      if tvm.testing.device_enabled(dev_type):
-          binded_mod = tvm.tir.transform.Apply(
-              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-          with pytest.raises(RuntimeError):
-              tvm.tir.transform.VerifyMemory()(binded_mod)
+    for dev_type in gpu_devices:
+        if tvm.testing.device_enabled(dev_type):
+            binded_mod = tvm.tir.transform.Apply(
+                lambda f: f.with_attr("target", tvm.target.Target(dev_type)))(mod)
+            with pytest.raises(RuntimeError):
+                tvm.tir.transform.VerifyMemory()(binded_mod)
 
-  for dev_type in other_devices:
-      if tvm.testing.device_enabled(dev_type):
-          binded_mod = tvm.tir.transform.Apply(
-              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-          tvm.tir.transform.VerifyMemory()(binded_mod)
+    for dev_type in other_devices:
+        if tvm.testing.device_enabled(dev_type):
+            binded_mod = tvm.tir.transform.Apply(
+                lambda f: f.with_attr("target", tvm.target.Target(dev_type)))(mod)
+            tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 # Computations are partially bound.
@@ -83,36 +82,35 @@ def test_verify_memory_not_bind():
 #
 @tvm.testing.uses_gpu
 def test_verify_memory_partially_bind():
-  n = te.var("n")
-  A = te.placeholder((n,), name='A')
-  B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
-  C = te.compute(B.shape, lambda i: B[i] + 2.0, name="C")
-  D = te.compute(C.shape, lambda i: C[i] + 2.0, name="D")
-
-  # C is bound to threads, but B and D are not.
-  s = te.create_schedule([B.op, C.op, D.op])
-  bx, tx = s[C].split(C.op.axis[0], factor=64)
-  s[C].bind(bx, te.thread_axis("blockIdx.x"))
-  s[C].bind(tx, te.thread_axis("threadIdx.x"))
-
-  mod = tvm. lower(s, [A, B, C, D])
-
-  for dev_type in gpu_devices:
-      if tvm.testing.device_enabled(dev_type):
-          binded_mod = tvm.tir.transform.Apply(
-              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-          with pytest.raises(RuntimeError):
-              tvm.tir.transform.VerifyMemory()(binded_mod)
-
-  for dev_type in other_devices:
-      if tvm.testing.device_enabled(dev_type):
-          binded_mod = tvm.tir.transform.Apply(
-              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-          tvm.tir.transform.VerifyMemory()(binded_mod)
-
+    n = te.var("n")
+    A = te.placeholder((n,), name='A')
+    B = te.compute(A.shape, lambda i: A[i] + 1.0, name="B")
+    C = te.compute(B.shape, lambda i: B[i] + 2.0, name="C")
+    D = te.compute(C.shape, lambda i: C[i] + 2.0, name="D")
+
+    # C is bound to threads, but B and D are not.
+    s = te.create_schedule([B.op, C.op, D.op])
+    bx, tx = s[C].split(C.op.axis[0], factor=64)
+    s[C].bind(bx, te.thread_axis("blockIdx.x"))
+    s[C].bind(tx, te.thread_axis("threadIdx.x"))
+
+    mod = tvm. lower(s, [A, B, C, D])
+
+    for dev_type in gpu_devices:
+        if tvm.testing.device_enabled(dev_type):
+            binded_mod = tvm.tir.transform.Apply(
+                lambda f: f.with_attr("target", tvm.target.Target(dev_type)))(mod)
+            with pytest.raises(RuntimeError):
+                tvm.tir.transform.VerifyMemory()(binded_mod)
+
+    for dev_type in other_devices:
+        if tvm.testing.device_enabled(dev_type):
+            binded_mod = tvm.tir.transform.Apply(
+                lambda f: f.with_attr("target", tvm.target.Target(dev_type)))(mod)
+            tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 if __name__ == "__main__":
-  test_verify_memory_all_bind()
-  test_verify_memory_not_bind()
-  test_verify_memory_partially_bind()
+    test_verify_memory_all_bind()
+    test_verify_memory_not_bind()
+    test_verify_memory_partially_bind()
diff --git a/tests/python/unittest/test_tir_transform_lower_intrin.py b/tests/python/unittest/test_tir_transform_lower_intrin.py
index fbd4ce62efe8..3042f9e47678 100644
--- a/tests/python/unittest/test_tir_transform_lower_intrin.py
+++ b/tests/python/unittest/test_tir_transform_lower_intrin.py
@@ -1,30 +1,31 @@
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements.  See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership.  The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License.  You may obtain a copy of the License at
- #
- #   http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied.  See the License for the
- # specific language governing permissions and limitations
- # under the License.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 import tvm
 import tvm.testing
 from tvm import te
 import numpy as np
 
+
 def lower_intrin(params, stmt):
     """wrapper to call transformation in stmt"""
     lower_expr = isinstance(stmt, tvm.tir.PrimExpr)
     stmt = tvm.tir.Evaluate(stmt) if lower_expr else stmt
     mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc(params, stmt).with_attr(
-        "target", tvm.target.create("llvm")))
+        "target", tvm.target.Target("llvm")))
     mod = tvm.transform.Sequential([
         tvm.tir.transform.Simplify(),
         tvm.tir.transform.LowerIntrin()
@@ -57,7 +58,6 @@ def make_binds(i):
     np.testing.assert_equal(c.asnumpy(), cref)
 
 
-
 def get_ref_data():
     """Get reference data for every pairs"""
     import itertools
@@ -78,17 +78,24 @@ def test_lower_floordiv():
         res = lower_intrin([x, y], tvm.te.floordiv(x, y))
         check_value(res, x, y, data, lambda a, b: a // b)
         # rhs >= 0
-        res = lower_intrin([x, y], tvm.tir.Select(y >= 0, tvm.te.floordiv(x, y), zero))
+        res = lower_intrin([x, y], tvm.tir.Select(
+            y >= 0, tvm.te.floordiv(x, y), zero))
         check_value(res, x, y, data, lambda a, b: a // b if b > 0 else 0)
         # involves max
-        res = lower_intrin([x, y], tvm.tir.Select(y >= 0, tvm.te.max(tvm.te.floordiv(x, y), zero), zero))
-        check_value(res, x, y, data, lambda a, b: max(a // b, 0) if b > 0 else 0)
+        res = lower_intrin([x, y], tvm.tir.Select(
+            y >= 0, tvm.te.max(tvm.te.floordiv(x, y), zero), zero))
+        check_value(res, x, y, data, lambda a,
+                    b: max(a // b, 0) if b > 0 else 0)
         # lhs >= 0
-        res = lower_intrin([x, y], tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floordiv(x, y), zero))
-        check_value(res, x, y, data, lambda a, b: a // b if b > 0 and a >= 0 else 0)
+        res = lower_intrin([x, y], tvm.tir.Select(
+            tvm.tir.all(y >= 0, x >= 0), tvm.te.floordiv(x, y), zero))
+        check_value(res, x, y, data, lambda a, b: a //
+                    b if b > 0 and a >= 0 else 0)
         # const power of two
-        res = lower_intrin([x, y], tvm.te.floordiv(x, tvm.tir.const(8, dtype=dtype)))
-        check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b)
+        res = lower_intrin([x, y], tvm.te.floordiv(
+            x, tvm.tir.const(8, dtype=dtype)))
+        check_value(res, x, y, [(a, b)
+                                for a, b in data if b == 8], lambda a, b: a // b)
 
 
 @tvm.testing.requires_llvm
@@ -102,15 +109,19 @@ def test_lower_floormod():
         res = lower_intrin([x, y], tvm.te.floormod(x, y))
         check_value(res, x, y, data, lambda a, b: a % b)
         # rhs >= 0
-        res = lower_intrin([x, y], tvm.tir.Select(y >= 0, tvm.te.floormod(x, y), zero))
+        res = lower_intrin([x, y], tvm.tir.Select(
+            y >= 0, tvm.te.floormod(x, y), zero))
         check_value(res, x, y, data, lambda a, b: a % b if b > 0 else 0)
         # lhs >= 0
-        res = lower_intrin([x, y], tvm.tir.Select(tvm.tir.all(y >= 0, x >= 0), tvm.te.floormod(x, y), zero))
-        check_value(res, x, y, data, lambda a, b: a % b if b > 0 and a >= 0 else 0)
+        res = lower_intrin([x, y], tvm.tir.Select(
+            tvm.tir.all(y >= 0, x >= 0), tvm.te.floormod(x, y), zero))
+        check_value(res, x, y, data, lambda a, b: a %
+                    b if b > 0 and a >= 0 else 0)
         # const power of two
-        res = lower_intrin([x, y], tvm.te.floormod(x, tvm.tir.const(8, dtype=dtype)))
-        check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a % b)
-
+        res = lower_intrin([x, y], tvm.te.floormod(
+            x, tvm.tir.const(8, dtype=dtype)))
+        check_value(res, x, y, [(a, b)
+                                for a, b in data if b == 8], lambda a, b: a % b)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_tir_transform_lower_warp_memory.py b/tests/python/unittest/test_tir_transform_lower_warp_memory.py
index eecc7f1bc4e9..38bf89cc4ab8 100644
--- a/tests/python/unittest/test_tir_transform_lower_warp_memory.py
+++ b/tests/python/unittest/test_tir_transform_lower_warp_memory.py
@@ -21,6 +21,7 @@
 import numpy as np
 import tvm.testing
 
+
 @tvm.testing.requires_cuda
 def test_lower_warp_memory_local_scope():
     m = 128
@@ -38,17 +39,19 @@ def test_lower_warp_memory_local_scope():
     xo, xi = s[AA].split(s[AA].op.axis[0], 32)
     s[AA].bind(xi, tx)
 
-    cuda_target = tvm.target.create("cuda")
+    cuda_target = tvm.target.Target("cuda")
     assert cuda_target.thread_warp_size == 32
     mod = tvm.lower(s, [A, B], name="f")
 
-    mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", cuda_target))(mod)
+    mod = tvm.tir.transform.Apply(
+        lambda f: f.with_attr("target", cuda_target))(mod)
     fdevice = tvm.tir.transform.SplitHostDevice()(mod)["f_kernel0"]
     mod = tvm.IRModule.from_expr(fdevice)
     fdevice = tvm.tir.transform.LowerWarpMemory()(mod)["f_kernel0"]
     assert(fdevice.body.body.value.value == "local")
     assert(fdevice.body.body.body.extents[0].value == 2)
 
+
 @tvm.testing.requires_cuda
 def test_lower_warp_memory_correct_indices():
     n = 32
@@ -82,10 +85,11 @@ def test_lower_warp_memory_correct_indices():
     #    assessing different buffers, so there is no need to distinguish from elements,
     #    and therefore threadIdx.y is NOT a index.
     idx_names = map(lambda x: x.name,
-            filter(lambda x: type(x) is tvm.tir.expr.Var, indices))
+                    filter(lambda x: type(x) is tvm.tir.expr.Var, indices))
     assert "threadIdx.x" in idx_names
     assert "threadIdx.y" not in idx_names
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_end_to_end():
@@ -96,9 +100,10 @@ def check_cuda(dtype):
 
         m = 128
         A = te.placeholder((m,), name='A', dtype=dtype)
-        B = te.compute((m,), lambda i: A[i // 32 * 32 + (i + 1) % 32], name='B')
+        B = te.compute(
+            (m,), lambda i: A[i // 32 * 32 + (i + 1) % 32], name='B')
 
-        cuda_target = tvm.target.create("cuda")
+        cuda_target = tvm.target.Target("cuda")
         assert cuda_target.thread_warp_size == 32
         with cuda_target:
             s = te.create_schedule(B.op)
@@ -116,11 +121,11 @@ def check_cuda(dtype):
             func = tvm.build(s, [A, B], "cuda")
             A_np = np.array(list(range(m)), dtype=dtype)
             B_np = np.array(
-                    list(range(1, 32)) + [0] +
-                    list(range(33, 64)) + [32] +
-                    list(range(65, 96)) + [64] +
-                    list(range(97, 128)) + [96],
-                    dtype=dtype)
+                list(range(1, 32)) + [0] +
+                list(range(33, 64)) + [32] +
+                list(range(65, 96)) + [64] +
+                list(range(97, 128)) + [96],
+                dtype=dtype)
             A_nd = tvm.nd.array(A_np, ctx)
             B_nd = tvm.nd.array(np.zeros(B_np.shape, dtype=B_np.dtype), ctx)
             func(A_nd, B_nd)
@@ -129,6 +134,7 @@ def check_cuda(dtype):
     check_cuda("float32")
     check_cuda("float16")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_half_a_warp():
@@ -141,7 +147,7 @@ def check_cuda(dtype):
         A = te.placeholder((n, m,), name='A', dtype=dtype)
         B = te.compute((n, m,), lambda j, i: A[j, (i + 1) % m], name='B')
 
-        cuda_target = tvm.target.create("cuda")
+        cuda_target = tvm.target.Target("cuda")
         assert cuda_target.thread_warp_size == 2 * m
         with cuda_target:
             s = te.create_schedule(B.op)
@@ -161,8 +167,10 @@ def check_cuda(dtype):
 
             ctx = tvm.gpu(0)
             func = tvm.build(s, [A, B], "cuda")
-            A_np = np.array([list(range(i, m + i)) for i in range(n)], dtype=dtype)
-            B_np = np.array([list(range(1 + i, m + i)) + [i] for i in range(n)], dtype=dtype)
+            A_np = np.array([list(range(i, m + i))
+                             for i in range(n)], dtype=dtype)
+            B_np = np.array([list(range(1 + i, m + i)) + [i]
+                             for i in range(n)], dtype=dtype)
             A_nd = tvm.nd.array(A_np, ctx)
             B_nd = tvm.nd.array(np.zeros(B_np.shape, dtype=B_np.dtype), ctx)
             func(A_nd, B_nd)
@@ -171,6 +179,7 @@ def check_cuda(dtype):
     check_cuda("float32")
     check_cuda("float16")
 
+
 @tvm.testing.requires_gpu
 @tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_2_buffers():
@@ -182,9 +191,10 @@ def check_cuda(dtype):
         m = 32
         A = te.placeholder((m,), name='A', dtype=dtype)
         B = te.placeholder((m,), name='B', dtype=dtype)
-        C = te.compute((m,), lambda i: A[(i + 1) % m] + B[(i + 1) % m], name='C')
+        C = te.compute((m,), lambda i: A[(i + 1) %
+                                         m] + B[(i + 1) % m], name='C')
 
-        cuda_target = tvm.target.create("cuda")
+        cuda_target = tvm.target.Target("cuda")
         assert m <= cuda_target.thread_warp_size
         with cuda_target:
             s = te.create_schedule(C.op)
@@ -218,13 +228,14 @@ def check_cuda(dtype):
     check_cuda("float32")
     check_cuda("float16")
 
+
 @tvm.testing.requires_gpu
 def test_lower_warp_memory_roundup():
     def check(device, m):
         A = te.placeholder((m,), name='A')
         B = te.compute((m,), lambda i: A[i] + 1, name='B')
 
-        with tvm.target.create(device):
+        with tvm.target.Target(device):
             s = te.create_schedule(B.op)
             xo, xi = s[B].split(B.op.axis[0], factor=32)
             tx = te.thread_axis("threadIdx.x")
@@ -248,7 +259,7 @@ def check(device, m):
 
     for device in ['cuda', 'rocm']:
         if not tvm.testing.device_enabled(device):
-            print("skip because", device,"is not enabled..")
+            print("skip because", device, "is not enabled..")
             continue
         check(device, m=31)
         check(device, m=32)
@@ -257,6 +268,7 @@ def check(device, m):
         check(device, m=64)
         check(device, m=65)
 
+
 if __name__ == "__main__":
     test_lower_warp_memory_local_scope()
     test_lower_warp_memory_correct_indices()
diff --git a/tests/python/unittest/test_tir_transform_make_packed_api.py b/tests/python/unittest/test_tir_transform_make_packed_api.py
index 161745c68e7d..4797eea28c49 100644
--- a/tests/python/unittest/test_tir_transform_make_packed_api.py
+++ b/tests/python/unittest/test_tir_transform_make_packed_api.py
@@ -33,7 +33,7 @@ def test_makeapi():
     mod = tvm.tir.transform.StorageFlatten(64)(mod)
     mod = tvm.tir.transform.Apply(
         lambda f: f.with_attr({
-            "target": tvm.target.create("llvm"),
+            "target": tvm.target.Target("llvm"),
             "global_symbol": "main",
         }))(mod)
 
diff --git a/tests/python/unittest/test_tir_transform_thread_sync.py b/tests/python/unittest/test_tir_transform_thread_sync.py
index 75b3193ecf3f..e87302cbc0e9 100644
--- a/tests/python/unittest/test_tir_transform_thread_sync.py
+++ b/tests/python/unittest/test_tir_transform_thread_sync.py
@@ -18,6 +18,7 @@
 from tvm import te
 import tvm.testing
 
+
 @tvm.testing.requires_cuda
 def test_thread_storage_sync():
     m = te.size_var('m')
@@ -41,18 +42,18 @@ def test_thread_storage_sync():
     mod = tvm.IRModule.from_expr(func)
     mod = tvm.tir.transform.StorageFlatten(64)(mod._move())
 
-    cuda_target = tvm.target.create("cuda")
+    cuda_target = tvm.target.Target("cuda")
 
     mod = tvm.tir.transform.Apply(lambda f: f.with_attr({
-            "global_symbol": "test", "target": cuda_target}))(mod._move())
+        "global_symbol": "test", "target": cuda_target}))(mod._move())
 
     fdevice = tvm.tir.transform.SplitHostDevice()(mod)["test_kernel0"]
     mod = tvm.IRModule.from_expr(fdevice)
-    cuda_target = tvm.target.create("cuda")
+    cuda_target = tvm.target.Target("cuda")
     f = tvm.tir.transform.ThreadSync("shared")(mod)["test_kernel0"]
     body_list = tvm.tir.stmt_list(f.body.body.body.body)
-    assert(body_list[1].value.op.same_as(tvm.ir.Op.get("tir.tvm_storage_sync")))
-
+    assert(body_list[1].value.op.same_as(
+        tvm.ir.Op.get("tir.tvm_storage_sync")))
 
 
 if __name__ == "__main__":
diff --git a/tutorials/autotvm/tune_conv2d_cuda.py b/tutorials/autotvm/tune_conv2d_cuda.py
index 904315109cba..07f7f9af7b57 100644
--- a/tutorials/autotvm/tune_conv2d_cuda.py
+++ b/tutorials/autotvm/tune_conv2d_cuda.py
@@ -213,7 +213,7 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding):
 
 # apply history best from log file
 with autotvm.apply_history_best('conv2d.log'):
-    with tvm.target.create("cuda"):
+    with tvm.target.Target("cuda"):
         s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides, padding)
         func = tvm.build(s, arg_bufs)
 
diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py
index d7529b2d109a..71b1d3e4ffbd 100644
--- a/tutorials/autotvm/tune_relay_arm.py
+++ b/tutorials/autotvm/tune_relay_arm.py
@@ -190,7 +190,7 @@ def get_network(name, batch_size):
 
 # Replace "aarch64-linux-gnu" with the correct target of your board.
 # This target is used for cross compilation. You can query it by :code:`gcc -v` on your device.
-target = tvm.target.create('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu')
+target = tvm.target.Target('llvm -device=arm_cpu -mtriple=aarch64-linux-gnu')
 
 # Also replace this with the device key in your tracker
 device_key = 'rk3399'
diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py
index 5f5e523cfe7a..27e4bd6393a7 100644
--- a/tutorials/autotvm/tune_relay_mobile_gpu.py
+++ b/tutorials/autotvm/tune_relay_mobile_gpu.py
@@ -187,7 +187,7 @@ def get_network(name, batch_size):
 
 #### DEVICE CONFIG ####
 
-target = tvm.target.create('opencl -device=mali')
+target = tvm.target.Target('opencl -device=mali')
 
 # Replace "aarch64-linux-gnu" with the correct target of your board.
 # This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.
diff --git a/tutorials/autotvm/tune_simple_template.py b/tutorials/autotvm/tune_simple_template.py
index c5a3843df659..fd22eecd75a6 100644
--- a/tutorials/autotvm/tune_simple_template.py
+++ b/tutorials/autotvm/tune_simple_template.py
@@ -312,7 +312,7 @@ def matmul(N, L, M, dtype):
 
 # apply history best from log file
 with autotvm.apply_history_best('matmul.log'):
-    with tvm.target.create("llvm"):
+    with tvm.target.Target("llvm"):
         s, arg_bufs = matmul(N, L, M, 'float32')
         func = tvm.build(s, arg_bufs)
 
diff --git a/tutorials/dev/use_pass_infra.py b/tutorials/dev/use_pass_infra.py
index 4b842b90995e..0452801d8665 100644
--- a/tutorials/dev/use_pass_infra.py
+++ b/tutorials/dev/use_pass_infra.py
@@ -191,7 +191,7 @@ def alter_conv2d(attrs, inputs, tinfos, out_type):
 
 seq1 = tvm.transform.Sequential([relay.transform.AlterOpLayout()])
 with tvm.transform.PassContext(opt_level=3):
-    with tvm.target.create("llvm"):
+    with tvm.target.Target("llvm"):
         mod5 = seq1(mod)
 print(mod5)
 
@@ -264,7 +264,7 @@ def print_ir(mod, info, is_before):
         print(mod)
 
 with tvm.transform.PassContext(opt_level=3, trace=print_ir):
-    with tvm.target.create("llvm"):
+    with tvm.target.Target("llvm"):
         # Perform the optimizations.
         mod = seq(mod)
 print(mod)
diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py
index c9174ad42f38..4a88a74ecc35 100644
--- a/tutorials/frontend/deploy_model_on_rasp.py
+++ b/tutorials/frontend/deploy_model_on_rasp.py
@@ -173,11 +173,11 @@ def transform_image(image):
 local_demo = True
 
 if local_demo:
-    target = tvm.target.create('llvm')
+    target = tvm.target.Target('llvm')
 else:
     target = tvm.target.arm_cpu('rasp3b')
     # The above line is a simple form of
-    # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -mtriple=armv7l-linux-gnueabihf -mattr=+neon')
+    # target = tvm.target.Target('llvm -device=arm_cpu -model=bcm2837 -mtriple=armv7l-linux-gnueabihf -mattr=+neon')
 
 with tvm.transform.PassContext(opt_level=3):
     lib = relay.build(func, target, params=params)
diff --git a/tutorials/language/tedd.py b/tutorials/language/tedd.py
index 7edcde99575a..6d2203729f4d 100644
--- a/tutorials/language/tedd.py
+++ b/tutorials/language/tedd.py
@@ -62,7 +62,7 @@
 W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
 B = te.placeholder((1, num_filter, 1), name='bias')
 
-with tvm.target.create("llvm"):
+with tvm.target.Target("llvm"):
     t_conv = topi.nn.conv2d_hwcn(A, W, stride, padding, dilation)
     t_bias = topi.add(t_conv, B)
     t_relu = topi.nn.relu(t_bias)
diff --git a/tutorials/optimize/opt_matmul_auto_tensorcore.py b/tutorials/optimize/opt_matmul_auto_tensorcore.py
index 30be72316e31..7ca04a6cfd8d 100644
--- a/tutorials/optimize/opt_matmul_auto_tensorcore.py
+++ b/tutorials/optimize/opt_matmul_auto_tensorcore.py
@@ -284,7 +284,7 @@ def tune_and_evaluate(M, N, L, dtype, layout):
   print("\nBest config:")
   print(best_config)
   with autotvm.apply_history_best('matmul.log'):
-    with tvm.target.create("cuda"):
+    with tvm.target.Target("cuda"):
           s, arg_bufs = test_gemm(N, L, M, dtype, layout)
           print(tvm.lower(s, arg_bufs, simple_mode=True))
           func = tvm.build(s, arg_bufs)
diff --git a/tutorials/topi/intro_topi.py b/tutorials/topi/intro_topi.py
index 5938b692119c..82d789250e1c 100644
--- a/tutorials/topi/intro_topi.py
+++ b/tutorials/topi/intro_topi.py
@@ -113,7 +113,7 @@
 #
 tarray = te.placeholder((512, 512), name="tarray")
 softmax_topi = topi.nn.softmax(tarray)
-with tvm.target.create("cuda"):
+with tvm.target.Target("cuda"):
     sst = topi.cuda.schedule_softmax(softmax_topi)
     print(tvm.lower(sst, [tarray], simple_mode=True))
 
@@ -133,7 +133,7 @@
 data = te.placeholder((1, 3, 224, 224))
 kernel = te.placeholder((10, 3, 5, 5))
 
-with tvm.target.create("cuda"):
+with tvm.target.Target("cuda"):
     conv = topi.cuda.conv2d_nchw(data, kernel, 1, 2, 1)
     out = topi.nn.relu(conv)
     sconv = topi.cuda.schedule_conv2d_nchw([out])