diff --git a/python/tvm/contrib/ethosu/cascader/__init__.py b/python/tvm/contrib/ethosu/cascader/__init__.py
index 03753b4049bb..f151fa9bc418 100644
--- a/python/tvm/contrib/ethosu/cascader/__init__.py
+++ b/python/tvm/contrib/ethosu/cascader/__init__.py
@@ -34,3 +34,6 @@
 )
 from .parts import InlinePart, EthosuPart
 from .device_config import EthosuDeviceConfig
+from .tensor_config import TensorConfigState, MemoryRegion, TensorConfig
+from .plan import Plan
+from .cascader_options import CascaderOptions
diff --git a/python/tvm/contrib/ethosu/cascader/cascader_options.py b/python/tvm/contrib/ethosu/cascader/cascader_options.py
new file mode 100644
index 000000000000..ff831ff37990
--- /dev/null
+++ b/python/tvm/contrib/ethosu/cascader/cascader_options.py
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Object to hold options for the NPU cascader"""
+import tvm._ffi
+
+from tvm.runtime import Object
+
+from . import _ffi_api
+from .tensor_config import MemoryRegion
+
+
+@tvm._ffi.register_object("contrib.ethosu.cascader.CascaderOptions")
+class CascaderOptions(Object):
+    """
+    A class to hold configuration options for the cascader.
+
+    Attributes
+    ----------
+    cascade_region : MemoryRegion
+        The MemoryRegion to place cascading buffers into.
+    max_proposals : int
+        The maximum number of Proposals to generate.
+    stripe_factors : int
+        How many striping factors to try per axis.
+    max_plan_size : int
+        The maximum number of Parts in a Plan.
+    always_copy_size : int
+        The maximum size of a Tensor that will always be copied into the cascade region.
+
+    """
+
+    def __init__(
+        self,
+        cascade_region: MemoryRegion,
+        max_proposals: int,
+        stripe_factors: int,
+        max_plan_size: int,
+        always_copy_size: int,
+    ):
+        self.__init_handle_by_constructor__(
+            _ffi_api.CascaderOptions,
+            cascade_region,
+            max_proposals,
+            stripe_factors,
+            max_plan_size,
+            always_copy_size,
+        )
diff --git a/python/tvm/contrib/ethosu/cascader/pareto.py b/python/tvm/contrib/ethosu/cascader/pareto.py
new file mode 100644
index 000000000000..3c4dcbc88a45
--- /dev/null
+++ b/python/tvm/contrib/ethosu/cascader/pareto.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Pareto optimisation functions for the NPU cascader."""
+from typing import List
+
+from tvm import Object
+
+from . import _ffi_api
+from .plan import Plan
+
+
+def _get_pareto_frontier(costs: List[List[float]]) -> List[bool]:
+    for i, cost in enumerate(costs):
+        for j, value in enumerate(cost):
+            costs[i][j] = float(value)
+
+    return [bool(v) for v in _ffi_api.GetParetoFrontier(costs)]
+
+
+def _thin_vector(vec: List[Object], max_size: int) -> List[Object]:
+    return list(_ffi_api.ThinVector(vec, max_size))
+
+
+def _pareto_cull_plans(plans: List[Plan], max_plans: int) -> List[Plan]:
+    return list(_ffi_api.ParetoCullPlans(plans, max_plans))
diff --git a/python/tvm/contrib/ethosu/cascader/plan.py b/python/tvm/contrib/ethosu/cascader/plan.py
new file mode 100644
index 000000000000..f960911ca133
--- /dev/null
+++ b/python/tvm/contrib/ethosu/cascader/plan.py
@@ -0,0 +1,167 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Plan class to hold subgraph scheduling information."""
+from typing import Dict, FrozenSet
+import tvm._ffi
+
+from tvm.runtime import Object
+
+from . import _ffi_api
+from .graph import Tensor, Part
+from .tensor_config import TensorConfig, MemoryRegion
+
+
+@tvm._ffi.register_object("contrib.ethosu.cascader.Plan")
+class Plan(Object):
+    """
+    A class which describes how to schedule a subgraph of Parts together.
+
+    A Plan takes the form of a subgraph of connected Parts (recorded in part_group) with
+    TensorConfigs for all of the required Tensors (recorded in tensor_configs). This information
+    can be used to produce a Tensor Expression schedule with inter-operator scheduling. A Plan is
+    necessarily single-output such that all non-output Parts are 'computed_at'ed the scope of the
+    output Part. This is what achieves the technique referred to as 'cascading'. A Plan also has
+    an interior memory region which specifies the region of memory into which all the Plans
+    intermediate buffers should be allocated.
+
+    Additionally, a Plan contains some other information used during the Plan generation and
+    selection algorithms. Both the memory and cycles required to run the Plan are accounted for so
+    that Plans can be ranked and Pareto-culled on these metrics. Furthermore, the TensorConfigs
+    which are 'open' is recorded indicating that these are valid points to merge with another Plan.
+    A Plan can only be turned into a schedule if it has no 'open' TensorConfigs - at which point
+    the Plan is said to be 'closed'.
+
+    Attributes
+    ----------
+    tensor_configs : Dict[Tensor, TensorConfig]
+        The TensorConfigs specified by the Plan.
+    open_configs : FrozenSet[TensorConfig]
+        The TensorConfigs which are 'open' meaning they are a Plan input/output but have
+        'interior' state.
+    output_config : TensorConfig
+        The TensorConfig of the Plan's output tensor.
+    part_group : FrozenSet[Part]
+        The Parts which are covered by the Plan.
+    interior_region : MemoryRegion
+        The MemoryRegion in which to store 'interior' Plan buffers.
+    memory_usage : int
+        The interior memory used by the Plan in bytes.
+    cycles : int
+        The cycles taken to execute the Plan.
+
+    """
+
+    def __init__(
+        self,
+        tensor_configs: Dict[Tensor, TensorConfig],
+        open_configs: FrozenSet[TensorConfig],
+        output_config: TensorConfig,
+        part_group: FrozenSet[Part],
+        interior_region: MemoryRegion,
+        memory_usage: int,
+        cycles: int,
+    ):
+        self.__init_handle_by_constructor__(
+            _ffi_api.Plan,
+            list(tensor_configs.values()),
+            list(open_configs),
+            output_config,
+            list(part_group),
+            interior_region,
+            memory_usage,
+            cycles,
+        )
+
+    def merge(self, other):
+        """
+        Merge two Plans with share an 'open' TensorConfig.
+
+        The current Plan is referred to as the 'upper Plan' and the other Plan as the 'lower
+        Plan'. The 'open' output config of the upper Plan must be an 'open' input config of the
+        lower Plan. The Tensor referenced by these configs is the Tensor on which the two Plans
+        will be merged. The merge process does the following:
+
+        The tensor config maps will be merged with TensorConfigs from the upper Plan taking
+        priority. The open configs will be merged with the TensorConfigs that are being merged
+        having been removed. The output config will be that of the lower Plan. The part groups
+        will be merged. The interior region is necessarily the same for both the upper and lower
+        Plan. The cycles and memory usage will be summed.
+
+        Parameters
+        ----------
+        other : Plan
+            The Plan to merge with.
+
+        Return
+        ------
+        Plan
+            The merged Plan.
+
+        """
+        return _ffi_api.PlanMerge(self, other)
+
+    @property
+    def tensor_configs(self):
+        """The TensorConfigs specified by the Plan."""
+        tensor_configs = {}
+        for config in self._tensor_configs:
+            tensor_configs[config.tensor] = config
+        return tensor_configs
+
+    @property
+    def open_configs(self):
+        """
+        The TensorConfigs which are 'open' meaning they are a Plan input/output but have
+        'interior' state.
+        """
+        return frozenset(self._open_configs)
+
+    @property
+    def output_config(self):
+        """The TensorConfig of the Plan's output tensor."""
+        return self._output_config
+
+    @property
+    def part_group(self):
+        """The Parts which are covered by the Plan."""
+        return frozenset(self._part_group)
+
+    @property
+    def interior_region(self):
+        """The MemoryRegion in which to store 'interior' Plan buffers."""
+        return self._interior_region
+
+    @property
+    def memory_usage(self):
+        """The interior memory used by the Plan in bytes."""
+        return self._memory_usage
+
+    @property
+    def cycles(self):
+        """The cycles taken to execute the Plan."""
+        return self._cycles
+
+    def __repr__(self):
+        return (
+            f"Plan(tensor_configs={self.tensor_configs}, "
+            f"open_configs={self.open_configs}, "
+            f"output_config={self.output_config}, "
+            f"part_group={self.part_group}, "
+            f"interior_region={self.interior_region.name}, "
+            f"memory_usage={self.memory_usage}, "
+            f"cycles={self.cycles}, "
+        )
diff --git a/python/tvm/contrib/ethosu/cascader/plan_generator.py b/python/tvm/contrib/ethosu/cascader/plan_generator.py
new file mode 100644
index 000000000000..36e0cf4420ea
--- /dev/null
+++ b/python/tvm/contrib/ethosu/cascader/plan_generator.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Algorithms to generate Plans for a CascaderGraph."""
+from typing import List, Dict
+
+from tvm.contrib.ethosu.cascader.tensor_config import MemoryRegion
+
+from . import _ffi_api
+from .cascader_options import CascaderOptions
+from .plan import Plan
+from .stripe_config import StripeConfig
+from .graph import CascaderGraph, Part, Tensor
+
+
+def _generate_output_stripe_configs(part: Part, stripe_factors: int) -> List[StripeConfig]:
+    return list(_ffi_api.GenerateOutputStripeConfigs(part, stripe_factors))
+
+
+def _generate_single_plans(
+    part: Part,
+    output_stripe_configs: List[StripeConfig],
+    home_map: Dict[Tensor, List[MemoryRegion]],
+    cascade_region: MemoryRegion,
+) -> List[Plan]:
+    return list(_ffi_api.GenerateSinglePlans(part, output_stripe_configs, home_map, cascade_region))
+
+
+def _generate_graph_plans(
+    graph: CascaderGraph,
+    home_map: Dict[Tensor, List[MemoryRegion]],
+    options: CascaderOptions,
+):
+    return _ffi_api.GenerateGraphPlans(
+        graph,
+        home_map,
+        options,
+    )
diff --git a/python/tvm/contrib/ethosu/cascader/tensor_config.py b/python/tvm/contrib/ethosu/cascader/tensor_config.py
new file mode 100644
index 000000000000..6787ea4f052e
--- /dev/null
+++ b/python/tvm/contrib/ethosu/cascader/tensor_config.py
@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tensor config class to hold tensor scheduling information."""
+from typing import List, Union
+from enum import IntEnum
+import tvm._ffi
+from tvm.contrib.ethosu.cascader.stripe_config import StripeConfig
+
+from tvm.runtime import Object
+
+from . import _ffi_api
+from .stripe_config import StripeConfig
+from .graph import Tensor, BufferMode
+
+
+class TensorConfigState(IntEnum):
+    """
+    The 'state' of a TensorConfig as used in the Plan generation algorithm.
+
+    BOUNDARY - Should describe a Plan input/output Tensor.
+    INTERIOR - Should describe an intermediate Tensor in a 'closed' Plan.
+
+    """
+
+    BOUNDARY = 0
+    INTERIOR = 1
+
+
+@tvm._ffi.register_object("contrib.ethosu.cascader.MemoryRegion")
+class MemoryRegion(Object):
+    """
+    MemoryRegion class to store information about device memories.
+
+    Attributes
+    ----------
+    name : str
+        The name of the region.
+    size : int
+        The size of the region.
+    read_bandwidth : int
+        The read bandwidth of the region in bytes per cycle.
+    write_bandwidth : int
+        The write bandwidth of the region in bytes per cycle.
+
+    """
+
+    def __init__(self, name: str, size: int, read_bandwidth: int, write_bandwidth: int):
+        self.__init_handle_by_constructor__(
+            _ffi_api.MemoryRegion, name, size, read_bandwidth, write_bandwidth
+        )
+
+
+@tvm._ffi.register_object("contrib.ethosu.cascader.TensorConfig")
+class TensorConfig(Object):
+    """
+    A class which describes how to realize a Tensor.
+
+    The TensorConfig describes both how a Tensor is scheduled (the order in which it's
+    produced/consumed) and how its allocated in memory (which region it should reside in
+    and whether it should be copied).
+
+    Attributes
+    ----------
+    tensor : Tensor
+        The Tensor the config applies to.
+    home_region : MemoryRegion
+        The region where the tensor is allocated.
+    state : TensorConfigState
+        The state of the TensorConfig.
+
+        The TensorConfigState is only used as part of the Plan generation algorithm. For a Plan
+        to be 'closed' (and therefore not subject to any further merging), all the TensorConfigs
+        that describe Plan input or output Tensors must be in the 'BOUNDARY' state with the rest
+        being 'INTERIOR'. If any of the input or output tensors are described by an 'INTERIOR'
+        TensorConfig, then the Plan is 'open' and should be merged with other 'open' Plans until
+        the result becomes 'closed'.
+    buffer_mode : BufferMode
+        The mode in which the buffer should be realized.
+
+        There are multiple buffering strategies by which a tensor may be realized (computed).
+        These affect the amount of recomputation necessary as well as the size of buffer required
+        to store the tensor. See 'BufferMode' for a description of the allowable buffering modes.
+    stripe_configs : List[StringConfig]
+       The StripeConfigs with which to compute the tensor.
+
+       The StripeConfigs determine the order in which the elements of the tensor should be
+       computed, including potentially computing them multiple times (recompute). Multiple
+       StripeConfigs are used over just a single StripeConfig for the case where the tensor is
+       consumed by two different Parts executing themselves with different StripeConfigs. In this
+       case, there is a StripeConfig per consumer of the tensor.
+    copy_tensor : bool, optional
+        Whether to copy the tensor.
+
+        While a tensor will originally reside in its home region, the TensorConfig may optionally
+        specify that the tensor should be copied (according to the StripeConfigs) into another
+        MemoryRegion. As an example for where this may be used, if a weights tensor initially
+        resides in slow Flash memory then necessarily the home region will be Flash. However, if
+        the weights values are used multiple times by a Part, it may be more performant to choose
+        to copy the weights into a faster memory like SRAM.
+    copy_region : Union[MemoryRegion, None], optional
+        The region to copy the tensor to.
+
+    """
+
+    def __init__(
+        self,
+        tensor: Tensor,
+        home_region: MemoryRegion,
+        state: TensorConfigState,
+        buffer_mode: BufferMode,
+        stripe_configs: List[StripeConfig],
+        copy_tensor: bool = False,
+        copy_region: Union[MemoryRegion, None] = None,
+    ):
+        if copy_region is None:
+            copy_region = home_region
+        self.__init_handle_by_constructor__(
+            _ffi_api.TensorConfig,
+            tensor,
+            home_region,
+            state,
+            buffer_mode,
+            stripe_configs,
+            copy_tensor,
+            copy_region,
+        )
+
+    def get_buffer_size(self):
+        """
+        The size of the buffer needed for the TensorConfig.
+
+        The size of buffer necessary to store a tensor being produced using the TensorConfig is
+        not necessarily just the size of the tensor. In Plans, a tensor may be being produced and
+        consumed in 'stripes' which are smaller than the full tensor. Therefore, the buffer
+        necessary to store the tensor may only need to be as large as the stripe. The precise size
+        of the buffer will depend both on the BufferMode and StripeConfigs (as well as, of course,
+        the Tensor).
+
+        """
+        return _ffi_api.TensorConfigGetBufferSize(self)
+
+    @property
+    def tensor(self):
+        """The Tensor the config applies to."""
+        return self._tensor
+
+    @property
+    def home_region(self):
+        """The region where the tensor is allocated."""
+        return self._home_region
+
+    @property
+    def state(self):
+        """The state of the TensorConfig."""
+        return TensorConfigState(self._state)
+
+    @property
+    def buffer_mode(self):
+        """The mode in which the buffer should be realized."""
+        return BufferMode(self._buffer_mode)
+
+    @property
+    def stripe_configs(self):
+        """The StripeConfigs with which to compute the tensor."""
+        return list(self._stripe_configs)
+
+    @property
+    def copy_tensor(self):
+        """Whether to copy the tensor."""
+        return bool(self._copy_tensor)
+
+    @property
+    def copy_region(self):
+        """The region to copy the tensor to."""
+        return self._copy_region
+
+    def __hash__(self):
+        return self._hash
+
+    def __eq__(self, other):
+        return _ffi_api.TensorConfigEqual(self, other)
+
+    def __repr__(self):
+        return (
+            f"TensorConfig(tensor={self.tensor}, "
+            f"home_region={self.home_region.name}, "
+            f"state={self.state.name}, "
+            f"buffer_mode={self.buffer_mode.name}, "
+            f"stripe_configs={self.stripe_configs}, "
+            f"copy_tensor={self.copy_tensor}, "
+            f"copy_region={self.copy_region.name}"
+        )
diff --git a/src/contrib/ethosu/cascader/cascader_options.cc b/src/contrib/ethosu/cascader/cascader_options.cc
new file mode 100644
index 000000000000..fb4b07940e2c
--- /dev/null
+++ b/src/contrib/ethosu/cascader/cascader_options.cc
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "cascader_options.h"
+
+#include <utility>
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+void CascaderOptionsNode::VisitAttrs(AttrVisitor* v) {
+  v->Visit("cascade_region", &cascade_region);
+  v->Visit("max_proposals", &max_proposals);
+  v->Visit("stripe_factors", &stripe_factors);
+  v->Visit("max_plan_size", &max_plan_size);
+  v->Visit("always_copy_size", &always_copy_size);
+}
+
+CascaderOptions::CascaderOptions(const MemoryRegion& cascade_region, int max_proposals,
+                                 int stripe_factors, int max_plan_size, int always_copy_size) {
+  auto n = make_object<CascaderOptionsNode>();
+  n->cascade_region = std::move(cascade_region);
+  n->max_proposals = max_proposals;
+  n->stripe_factors = stripe_factors;
+  n->max_plan_size = max_plan_size;
+  n->always_copy_size = always_copy_size;
+  data_ = std::move(n);
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.CascaderOptions")
+    .set_body_typed([](MemoryRegion cascade_region, int max_proposals, int stripe_factors,
+                       int max_plan_size, int always_copy_size) {
+      return CascaderOptions(cascade_region, max_proposals, stripe_factors, max_plan_size,
+                             always_copy_size);
+    });
+
+TVM_REGISTER_NODE_TYPE(CascaderOptionsNode);
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/cascader_options.h b/src/contrib/ethosu/cascader/cascader_options.h
new file mode 100644
index 000000000000..135de784ad3c
--- /dev/null
+++ b/src/contrib/ethosu/cascader/cascader_options.h
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/contrib/ethosu/cascader/cascader_options.h
+ * \brief Class to store configuration options for the NPU cascader
+ */
+#ifndef TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
+#define TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
+
+#include <tvm/node/reflection.h>
+#include <tvm/runtime/object.h>
+
+#include "tensor_config.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+/*! \brief Node to represent CascaderOptions */
+class CascaderOptionsNode : public Object {
+ public:
+  void VisitAttrs(AttrVisitor* v);
+
+  /*! \brief The MemoryRegion to place cascading buffer into. */
+  MemoryRegion cascade_region;
+  /*! \brief The maximum number of Proposals to generate. */
+  int max_proposals;
+  /*! \brief How many striping factors to try per axis. */
+  int stripe_factors;
+  /*! \brief The maximum number of Parts in a Plan. */
+  int max_plan_size;
+  /*! \brief The maximum size of Tensor that will always be copied into the cascade region. */
+  int always_copy_size;
+
+  static constexpr const char* _type_key = "contrib.ethosu.cascader.CascaderOptions";
+  TVM_DECLARE_FINAL_OBJECT_INFO(CascaderOptionsNode, Object)
+};
+
+/*! \brief A class to hold configuration options for the cascader. */
+class CascaderOptions : public ObjectRef {
+ public:
+  CascaderOptions(const MemoryRegion& cascade_region, int max_proposals, int stripe_factors,
+                  int max_plan_size, int always_copy_size);
+
+  TVM_DEFINE_OBJECT_REF_METHODS(CascaderOptions, ObjectRef, CascaderOptionsNode);
+};
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
+
+#endif  // TVM_CONTRIB_ETHOSU_CASCADER_CASCADER_OPTIONS_H_
diff --git a/src/contrib/ethosu/cascader/pareto.cc b/src/contrib/ethosu/cascader/pareto.cc
new file mode 100644
index 000000000000..255719088cb0
--- /dev/null
+++ b/src/contrib/ethosu/cascader/pareto.cc
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "pareto.h"
+
+#include <tvm/runtime/container/array.h>
+#include <tvm/runtime/container/map.h>
+#include <tvm/runtime/object.h>
+#include <tvm/runtime/registry.h>
+
+#include <array>
+#include <utility>
+#include <vector>
+
+#include "common.h"
+#include "plan.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+template <int N>
+std::vector<bool> GetParetoFrontier(const std::vector<std::array<float, N>>& costs) {
+  std::vector<bool> is_optimal(costs.size(), true);
+  for (size_t i = 0; i < costs.size(); i++) {
+    if (is_optimal[i]) {
+      for (size_t j = 0; j < costs.size(); j++) {
+        if (is_optimal[j]) {
+          bool optimal = false;
+          for (size_t k = 0; k < N; k++) {
+            if (costs[i][k] > costs[j][k]) {
+              optimal = true;
+              break;
+            }
+          }
+          is_optimal[j] = optimal;
+        }
+      }
+      is_optimal[i] = true;
+    }
+  }
+  return is_optimal;
+}
+
+template <class T>
+std::vector<T> ThinVector(const std::vector<T>& vec, size_t max_size) {
+  if (max_size < 1) {
+    return std::vector<T>();
+  }
+  if (vec.size() <= max_size || vec.size() == 0) {
+    return vec;
+  }
+  if (max_size == 1) {
+    return std::vector<T>{vec[0]};
+  }
+  std::vector<T> thin_vec;
+  float step = static_cast<float>(vec.size()) / static_cast<float>(max_size - 1);
+  for (float i = 0; i < vec.size() - 1; i += step) {
+    thin_vec.push_back(vec[static_cast<int>(i)]);
+  }
+  thin_vec.push_back(vec.back());
+  return thin_vec;
+}
+
+std::vector<Plan> ParetoCullPlans(std::vector<Plan> plans, size_t max_plans) {
+  if (plans.size() <= max_plans) {
+    return plans;
+  }
+  std::sort(plans.begin(), plans.end(), [](const Plan& a, const Plan& b) -> bool {
+    return a->GetMemoryUsage() < b->GetMemoryUsage();
+  });
+  std::vector<std::array<float, 2>> costs;
+  for (const auto& plan : plans) {
+    std::array<float, 2> cost = {static_cast<float>(plan->GetMemoryUsage()),
+                                 static_cast<float>(plan->GetCycles())};
+    costs.emplace_back(cost);
+  }
+  std::vector<bool> is_optimal = GetParetoFrontier<2>(costs);
+  std::vector<Plan> optimal_plans;
+  size_t i = 0;
+  for (bool optimal : is_optimal) {
+    if (optimal) {
+      optimal_plans.push_back(plans[i]);
+    }
+    i++;
+  }
+  if (optimal_plans.size() <= max_plans) {
+    return optimal_plans;
+  }
+  return ThinVector(optimal_plans, max_plans);
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetParetoFrontier")
+    .set_body_typed([](Array<Array<FloatImm>> tcosts) {
+      std::vector<std::array<float, 2>> costs;
+      for (const auto& tcost : tcosts) {
+        ICHECK_EQ(tcost.size(), 2);
+        std::array<float, 2> point = {static_cast<float>(tcost[0]->value),
+                                      static_cast<float>(tcost[1]->value)};
+        costs.push_back(point);
+      }
+      Array<Bool> is_optimal;
+      for (bool opt : GetParetoFrontier<2>(costs)) {
+        is_optimal.push_back(Bool(opt));
+      }
+      return is_optimal;
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ThinVector")
+    .set_body_typed([](Array<ObjectRef> vec, int max_size) {
+      std::vector<ObjectRef> vvec(vec.begin(), vec.end());
+      return Array<ObjectRef>(ThinVector<ObjectRef>(vvec, max_size));
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.ParetoCullPlans")
+    .set_body_typed([](Array<Plan> plans, int max_size) {
+      std::vector<Plan> vplans(plans.begin(), plans.end());
+      return Array<Plan>(ParetoCullPlans(vplans, max_size));
+    });
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/pareto.h b/src/contrib/ethosu/cascader/pareto.h
new file mode 100644
index 000000000000..6b0cd4e294ac
--- /dev/null
+++ b/src/contrib/ethosu/cascader/pareto.h
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/contrib/ethosu/cascader/pareto.h
+ * \brief Pareto optimisation functions for the NPU cascader
+ */
+#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
+#define TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
+
+#include <tvm/ir/expr.h>
+#include <tvm/runtime/container/array.h>
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+class Plan;
+
+/*!
+ * \brief Determine the Pareto optimal points.
+ * \param costs The points as a vector of N-dimensional costs.
+ * \return A vector that is true where a point is Pareto optimal and false otherwise.
+ */
+template <int N>
+std::vector<bool> GetParetoFrontier(const std::vector<std::array<float, N>>& costs);
+
+/*!
+ * \brief Evenly sample items from a vector to reduce its size.
+ * \param vec The vector to thin.
+ * \param max_size The maximum size of the thinned vector.
+ * \return The thinned vector.
+ */
+template <class T>
+std::vector<T> ThinVector(const std::vector<T>& vec, size_t max_size);
+
+/*!
+ * \brief Cull plans which are not Pareto optimal then thin them down.
+ * \param plans The plans to apply the Pareto culling to.
+ * \param max_plans The maximum number of plans after the culling.
+ * \return The culled plans.
+ * \note Plan Pareto-optimality is determined based upon a Plan's memory_usage
+ * and cycles.
+ */
+std::vector<Plan> ParetoCullPlans(std::vector<Plan> plans, size_t max_plans);
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
+
+#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PARETO_H_
diff --git a/src/contrib/ethosu/cascader/plan.cc b/src/contrib/ethosu/cascader/plan.cc
new file mode 100644
index 000000000000..173b3f9e8d20
--- /dev/null
+++ b/src/contrib/ethosu/cascader/plan.cc
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "plan.h"
+
+#include <tvm/runtime/container/array.h>
+#include <tvm/runtime/container/map.h>
+#include <tvm/runtime/object.h>
+#include <tvm/runtime/registry.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "graph.h"
+#include "tensor_config.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+void PlanNode::VisitAttrs(AttrVisitor* v) {
+  Array<TensorConfig> tmp_arr(tensor_configs_);
+  v->Visit("_tensor_configs", &tmp_arr);
+  Array<TensorConfig> tmp_cfgs(open_configs_.begin(), open_configs_.end());
+  v->Visit("_open_configs", &tmp_cfgs);
+  v->Visit("_output_config", &output_config_);
+  Array<Part> tmp_parts(part_group_.begin(), part_group_.end());
+  v->Visit("_part_group", &tmp_parts);
+  v->Visit("_interior_region", &interior_region_);
+  v->Visit("_memory_usage", &memory_usage_);
+  v->Visit("_cycles", &cycles_);
+}
+
+Plan::Plan(const std::vector<TensorConfig>& tensor_configs,
+           const std::vector<TensorConfig>& open_configs, const TensorConfig& output_config,
+           const std::vector<Part>& part_group, const MemoryRegion& interior_region,
+           int memory_usage, int cycles) {
+  auto n = make_object<PlanNode>();
+  n->tensor_configs_ = std::move(tensor_configs);
+  n->open_configs_ = std::move(open_configs);
+  n->output_config_ = std::move(output_config);
+  n->part_group_ = std::move(part_group);
+  n->interior_region_ = interior_region;
+  n->memory_usage_ = memory_usage;
+  n->cycles_ = cycles;
+  data_ = std::move(n);
+}
+
+Plan Plan::Merge(const Plan& other) const {
+  auto n = make_object<PlanNode>(*this->operator->());
+  n->tensor_configs_.insert(n->tensor_configs_.end(), other->tensor_configs_.begin(),
+                            other->tensor_configs_.end());
+  n->open_configs_.erase(
+      std::remove(n->open_configs_.begin(), n->open_configs_.end(), (*this)->output_config_),
+      n->open_configs_.end());
+  for (const auto& config : other->open_configs_) {
+    if (config->GetTensor() != (*this)->output_config_->GetTensor()) {
+      n->open_configs_.push_back(config);
+    }
+  }
+  n->output_config_ = other->output_config_;
+  n->part_group_.insert(n->part_group_.end(), other->part_group_.begin(), other->part_group_.end());
+  std::sort(n->part_group_.begin(), n->part_group_.end());
+  n->memory_usage_ += other->memory_usage_;
+  n->cycles_ += other->cycles_;
+  return Plan(n);
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.Plan")
+    .set_body_typed([](Array<TensorConfig> tensor_configs, Array<TensorConfig> open_configs,
+                       TensorConfig output_config, Array<Part> part_group,
+                       MemoryRegion interior_region, int memory_usage, int cycles) {
+      std::vector<TensorConfig> vtensor_configs(tensor_configs.begin(), tensor_configs.end());
+      std::vector<TensorConfig> sopen_configs(open_configs.begin(), open_configs.end());
+      std::vector<Part> spart_group(part_group.begin(), part_group.end());
+      return Plan(vtensor_configs, sopen_configs, output_config, spart_group, interior_region,
+                  memory_usage, cycles);
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PlanMerge").set_body_method(&Plan::Merge);
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.PlanMergeBenchmark")
+    .set_body_typed([](Plan plan, Plan other, int repeats) {
+      for (int i = 0; i < repeats; i++) {
+        plan.Merge(other);
+      }
+      return plan.Merge(other);
+    });
+
+TVM_REGISTER_NODE_TYPE(PlanNode);
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/plan.h b/src/contrib/ethosu/cascader/plan.h
new file mode 100644
index 000000000000..65efe98e4ff5
--- /dev/null
+++ b/src/contrib/ethosu/cascader/plan.h
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/contrib/ethosu/cascader/plan.h
+ * \brief Plan object for the NPU cascader
+ */
+#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
+#define TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
+
+#include <tvm/node/reflection.h>
+#include <tvm/runtime/object.h>
+
+#include <functional>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "graph.h"
+#include "tensor_config.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+/*! \brief Node to represent a Plan */
+class PlanNode : public Object {
+ public:
+  void VisitAttrs(AttrVisitor* v);
+
+  /*! \return The TensorConfigs specified by the Plan */
+  const std::vector<TensorConfig>& GetTensorConfigs() const { return tensor_configs_; }
+  /*! \return The TensorConfigs which are 'open' meaning they are a Plan input/output but have
+   * INTERIOR state */
+  const std::vector<TensorConfig>& GetOpenConfigs() const { return open_configs_; }
+  /*! \return The TensorConfig of the Plan's output tensor */
+  const TensorConfig GetOutputConfig() const { return output_config_; }
+  /*! \return The Parts which are covered by the Plan */
+  const std::vector<Part>& GetPartGroup() const { return part_group_; }
+  /*! \return The memory region in which to store interior Plan buffers */
+  MemoryRegion const GetInteriorRegion() const { return interior_region_; }
+  /*!
+   * \return The interior memory used by the Plan in bytes.
+   * \note The interior memory usage is defined as being the memory required in the interior region
+   * to execute the Plan excluding input and output buffers.
+   */
+  int GetMemoryUsage() const { return memory_usage_; }
+  /*! \return The cycles taken to execute the Plan */
+  int GetCycles() const { return cycles_; }
+  /*! \return Whether the Plan is 'closed' meaning it has no 'open' TensorConfigs */
+  bool IsClosed() const { return open_configs_.size() == 0; }
+
+  static constexpr const char* _type_key = "contrib.ethosu.cascader.Plan";
+  TVM_DECLARE_FINAL_OBJECT_INFO(PlanNode, Object);
+
+ protected:
+  friend class Plan;
+
+  /*! \brief The TensorConfigs specified by the Plan */
+  std::vector<TensorConfig> tensor_configs_;
+  /*! \brief The TensorConfigs which are 'open' meaning they are a Plan input/output but have
+   * INTERIOR state */
+  std::vector<TensorConfig> open_configs_;
+  /*! \brief The TensorConfig of the Plan's output tensor */
+  TensorConfig output_config_;
+  /*! \brief The Parts which are covered by the Plan */
+  std::vector<Part> part_group_;
+  /*! \brief The memory region in which to store interior Plan buffers */
+  MemoryRegion interior_region_;
+  /*! \brief The interior memory used by the Plan in bytes */
+  int memory_usage_;
+  /*! \brief The cycles taken to execute the Plan */
+  int cycles_;
+};
+
+/*!
+ * \brief A class which describes how to schedule a subgraph of Parts together.
+ * \note A Plan takes the form of a subgraph of connected Parts (recorded in part_group) with
+ * TensorConfigs for all of the required Tensors (recorded in tensor_configs). This information can
+ * be used to produce a Tensor Expression schedule with inter-operator scheduling. A Plan is
+ * necessarily single-output such that all non-output Parts are 'computed_at'ed the scope of the
+ * output Part. This is what achieves the technique referred to as 'cascading'. A Plan also has an
+ * interior memory region which specifies the region of memory into which all the Plans intermediate
+ * buffers should be allocated.
+ *
+ * Additionally, a Plan contains some other information used during the Plan generation and
+ * selection algorithms. Both the memory and cycles required to run the Plan are accounted for so
+ * that Plans can be ranked and Pareto-culled on these metrics. Furthermore, the TensorConfigs which
+ * are 'open' is recorded indicating that these are valid points to merge with another Plan. A Plan
+ * can only be turned into a schedule if it has no 'open' TensorConfigs - at which point the Plan is
+ * said to be 'closed'.
+ */
+class Plan : public ObjectRef {
+ public:
+  Plan(const std::vector<TensorConfig>& tensor_configs,
+       const std::vector<TensorConfig>& open_configs, const TensorConfig& output_config,
+       const std::vector<Part>& part_group, const MemoryRegion& interior_region, int memory_usage,
+       int cycles);
+  /*!
+   * \brief Merge two Plans which share an 'open' TensorConfig.
+   * \param other The Plan to merge with.
+   * \return The merged Plan.
+   * \note The current Plan is referred to as the 'upper Plan' and the other Plan as the 'lower
+   * Plan'. The 'open' output config of the upper Plan must be an 'open' input config of the lower
+   * Plan. The Tensor referenced by these configs is the Tensor on which the two Plans will be
+   * merged. The merge process does the following:
+   *
+   * The tensor config maps will be merged with TensorConfigs from the upper Plan taking priority.
+   * The open configs will be merged with the TensorConfigs that are being merged having been
+   * removed. The output config will be that of the lower Plan. The part groups will be merged. The
+   * interior region is necessarily the same for both the upper and lower Plan. The cycles and
+   * memory usage will be summed.
+   */
+  Plan Merge(const Plan& other) const;
+
+  TVM_DEFINE_OBJECT_REF_METHODS(Plan, ObjectRef, PlanNode);
+};
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
+
+// Hash functions TensorConfig and Part sets
+namespace std {
+
+using TensorConfigSet = std::vector<::tvm::contrib::ethosu::cascader::TensorConfig>;
+using PartSet = std::vector<::tvm::contrib::ethosu::cascader::Part>;
+
+template <>
+struct hash<TensorConfigSet> {
+  std::size_t operator()(const TensorConfigSet& tensor_config_set) const {
+    size_t seed = 0;
+    for (const auto& tensor_config : tensor_config_set) {
+      seed ^= hash<::tvm::contrib::ethosu::cascader::TensorConfig>()(tensor_config);
+    }
+    return seed;
+  }
+};
+
+template <>
+struct equal_to<TensorConfigSet> {
+  bool operator()(const TensorConfigSet& lhs, const TensorConfigSet& rhs) const {
+    std::unordered_set<::tvm::contrib::ethosu::cascader::TensorConfig> lh_set(lhs.begin(),
+                                                                              lhs.end());
+    std::unordered_set<::tvm::contrib::ethosu::cascader::TensorConfig> rh_set(rhs.begin(),
+                                                                              rhs.end());
+    return lh_set == rh_set;
+  }
+};
+
+template <>
+struct hash<PartSet> {
+  std::size_t operator()(const PartSet& part_set) const {
+    size_t seed = 0;
+    for (const auto& part : part_set) {
+      seed ^= tvm::runtime::ObjectHash()(part);
+    }
+    return seed;
+  }
+};
+
+template <>
+struct equal_to<PartSet> {
+  bool operator()(const PartSet& lhs, const PartSet& rhs) const { return lhs == rhs; }
+};
+
+}  // namespace std
+
+#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PLAN_H_
diff --git a/src/contrib/ethosu/cascader/plan_generator.cc b/src/contrib/ethosu/cascader/plan_generator.cc
new file mode 100644
index 000000000000..9acffb7e9479
--- /dev/null
+++ b/src/contrib/ethosu/cascader/plan_generator.cc
@@ -0,0 +1,529 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "plan_generator.h"
+
+#include <tvm/runtime/container/array.h>
+#include <tvm/runtime/container/map.h>
+#include <tvm/runtime/object.h>
+#include <tvm/runtime/registry.h>
+#include <tvm/support/parallel_for.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "cascader_options.h"
+#include "common.h"
+#include "graph.h"
+#include "pareto.h"
+#include "plan.h"
+#include "stripe_config.h"
+#include "tensor_config.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+template <class T>
+std::vector<std::vector<T>> EnumerateCombinations(std::vector<std::vector<T>> values) {
+  if (values.size() == 0) {
+    return values;
+  }
+  if (values.size() == 1) {
+    std::vector<std::vector<T>> combs;
+    for (const auto& value : values[0]) {
+      combs.push_back(std::vector<T>(1, value));
+    }
+    return combs;
+  }
+  auto combs = EnumerateCombinations(std::vector<std::vector<T>>(values.begin(), values.end() - 1));
+  std::vector<std::vector<T>> new_combs;
+  for (const auto& value : values.back()) {
+    for (const auto& comb : combs) {
+      auto new_comb = std::vector<T>(comb);
+      new_comb.push_back(value);
+      new_combs.push_back(new_comb);
+    }
+  }
+  return new_combs;
+}
+
+std::vector<bool> GetCascadableAxes(const Part& part) {
+  std::vector<bool> cascadable_axes(part->GetOutputTensor()->GetShape().size());
+  // Check all the propagators to see if an output axis is projected into any
+  // of the inputs. If it is, then that axis is cascadable.
+  for (const auto& propagator : part->GetPropagators()) {
+    auto transform = propagator->GetTransform();
+    for (size_t i = 0; i < transform.size(); i++) {
+      for (size_t j = 0; j < transform[0].size() - 1; j++) {
+        // An axis is projected if there's a non-zero element
+        // in the transform matrix
+        if (transform[i][j] != 0) {
+          cascadable_axes[j] = true;
+        }
+      }
+    }
+  }
+  return cascadable_axes;
+}
+
+std::vector<StripeConfig> GenerateOutputStripeConfigs(const Part& part, int stripe_factors) {
+  // If stripe_factors is <= 0, then we won't produce any StripeConfigs
+  if (stripe_factors <= 0) {
+    return std::vector<StripeConfig>();
+  }
+  // Work out the factors to divide by as inverse powers of 2.
+  // The last factor is always reserved to be '0' which will correspond to
+  // choosing a stripe size of 1 in the dimension. We always include this
+  // as it represents the most extreme striping choice that uses the least
+  // memory, so it is our choice of last resort.
+  // For example, if stripe_factors = 4 then the factors are 1, 1/2, 1/4, 0.
+  std::vector<float> factors;
+  for (size_t i = 0; i < static_cast<size_t>(stripe_factors) - 1; i++) {
+    factors.push_back(1.0f / (std::pow(2.0f, i)));
+  }
+  factors.push_back(0);
+  // Then use the factors to derive the possible ways to split each dimension
+  // into stripes. As an example, if an had extent 128 then by applying
+  // the factors derived above we get the following possible splits for that axis:
+  // 128, 64, 32, 1
+  std::vector<std::vector<int>> splits;
+  std::vector<int> output_shape = part->GetOutputTensor()->GetShape();
+  size_t output_dims = output_shape.size();
+  // Only bother striping along the axes which are cascadable
+  auto cascadable_axes = GetCascadableAxes(part);
+  for (size_t i = 0; i < output_dims; i++) {
+    auto axis = output_shape[i];
+    auto axis_align = part->GetStripeAlignHint()[i];
+    std::set<int> axis_splits;  // Note this is a set to remove duplicate splits
+    if (!cascadable_axes[i]) {
+      axis_splits.insert(axis);
+    } else {
+      for (float factor : factors) {
+        int split =
+            std::max(static_cast<int>(std::ceil(axis * factor / axis_align)), 1) * axis_align;
+        split = std::min(axis, split);
+        axis_splits.insert(split);
+      }
+    }
+    splits.push_back(std::vector<int>(axis_splits.begin(), axis_splits.end()));
+  }
+  // Now calculate all the possible combinations of splits for each dimension
+  // to give us all the possible stripe shapes. For example, if we had two axes
+  // both with possible splits in {128, 64, 32, 1}, the stripe shapes would be:
+  // (128, 128), (128, 64), (128, 32) ... (1, 64), (1, 32), (1, 1)
+  auto stripe_shapes = EnumerateCombinations<int>(splits);
+  auto offset = std::vector<int>(output_dims);
+  std::vector<StripeConfig> stripe_configs;
+  // Calculate the possible axis orderings such that each axis has the opportunity
+  // to be the 'outermost' axis (which is axis that is chosen for rolling).
+  std::vector<std::vector<int>> orders;
+  for (size_t i = 0; i < output_dims; i++) {
+    std::vector<int> order(output_dims);
+    for (size_t j = 0; j < output_dims; j++) {
+      order[j] = 1 + (j + i) % output_dims;
+    }
+    orders.push_back(order);
+  }
+  // Finally, create the StripeConfigs from the possible stripe shapes and orders
+  for (const auto& stripe_shape : stripe_shapes) {
+    std::vector<int> stripes;
+    std::vector<float> strides;
+    for (size_t i = 0; i < output_dims; i++) {
+      stripes.push_back(std::ceil(static_cast<float>(output_shape[i]) / stripe_shape[i]));
+      strides.push_back(static_cast<float>(stripe_shape[i]));  // strides = stripe_shape
+    }
+    // If the stripe shape equals the output shape (i.e. there's no striping), then
+    // the order doesn't matter, so just pick the first order and continue.
+    if (stripe_shape == output_shape) {
+      stripe_configs.push_back(
+          StripeConfig(stripe_shape, output_shape, strides, orders[0], stripes, offset));
+      continue;
+    }
+    for (const auto& order : orders) {
+      // Some logic to avoid having an axis be the 'outermost' if the stripe is full
+      // size in that axis. This would otherwise be a waste because we can't roll
+      // over an axis that hasn't been split.
+      bool skip = false;
+      for (size_t i = 0; i < output_dims; i++) {
+        if (order[i] == 1 && stripe_shape[i] == output_shape[i]) {
+          skip = true;
+          break;
+        }
+      }
+      if (skip) continue;
+      stripe_configs.push_back(
+          StripeConfig(stripe_shape, output_shape, strides, order, stripes, offset));
+    }
+  }
+  return stripe_configs;
+}
+
+std::vector<TensorConfig> GetPossibleInputConfigs(const StripeConfig& stripe_config,
+                                                  const Tensor& tensor,
+                                                  const std::vector<MemoryRegion>& home_regions,
+                                                  const CascaderOptions& options) {
+  std::vector<TensorConfig> configs;
+  for (const auto& home_region : home_regions) {
+    // Boundary configs
+    if (home_region == options->cascade_region || tensor->GetSize() > options->always_copy_size) {
+      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
+                                     BufferMode::RECOMPUTE, {stripe_config}, false, home_region));
+    }
+    if (home_region != options->cascade_region) {
+      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
+                                     BufferMode::ROLLING, {stripe_config}, true,
+                                     options->cascade_region));
+    }
+  }
+  if (!tensor->IsConstant()) {
+    // Interior configs
+    configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
+                                   BufferMode::RECOMPUTE, {stripe_config}, false,
+                                   options->cascade_region));
+    configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
+                                   BufferMode::ROLLING, {stripe_config}, false,
+                                   options->cascade_region));
+  }
+  return configs;
+}
+
+// Check whether a StripeConfig can be an output boundary config
+bool CanBound(const StripeConfig& stripe_config) {
+  // Determine whether the StripeConfig results in non-overlapping stripes
+  // which is the case when the stripe shape equals the strides
+  for (size_t i = 0; i < stripe_config->GetShape().size(); i++) {
+    // Check that the stripe shape and strides are equal
+    if (stripe_config->GetShape()[i] - stripe_config->GetStrides()[i] != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::vector<TensorConfig> GetPossibleOutputConfigs(const StripeConfig& stripe_config,
+                                                   const Tensor& tensor,
+                                                   const std::vector<MemoryRegion>& home_regions,
+                                                   const CascaderOptions& options) {
+  std::vector<TensorConfig> configs;
+  // Only StripeConfigs with non-overlapping stripes can be output boundary configs
+  if (CanBound(stripe_config)) {
+    for (const auto& home_region : home_regions) {
+      // Boundary configs
+      configs.push_back(TensorConfig(tensor, home_region, TensorConfigState::BOUNDARY,
+                                     BufferMode::RECOMPUTE, {stripe_config}, false, home_region));
+    }
+  }
+  // Interior configs
+  configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
+                                 BufferMode::RECOMPUTE, {stripe_config}, false,
+                                 options->cascade_region));
+  configs.push_back(TensorConfig(tensor, options->cascade_region, TensorConfigState::INTERIOR,
+                                 BufferMode::ROLLING, {stripe_config}, false,
+                                 options->cascade_region));
+  return configs;
+}
+
+int GetInteriorMemoryUsage(const std::vector<TensorConfig>& input_configs,
+                           const TensorConfig& output_config, const MemoryRegion& interior_region) {
+  int memory_usage = 0;
+  if (output_config->GetHomeRegion() == interior_region &&
+      output_config->GetState() == TensorConfigState::BOUNDARY) {
+    memory_usage += output_config->GetTensor()->GetSize();
+  }
+  for (const auto& input_config : input_configs) {
+    if (input_config->GetHomeRegion() == interior_region &&
+        input_config->GetState() == TensorConfigState::BOUNDARY) {
+      memory_usage += input_config->GetTensor()->GetSize();
+    } else if (input_config->GetHomeRegion() == interior_region ||
+               input_config->GetCopyRegion() == interior_region) {
+      memory_usage += input_config->GetBufferSize();
+    }
+  }
+  return memory_usage;
+}
+
+std::vector<Plan> GenerateSinglePlans(
+    const Part& part, const std::vector<StripeConfig>& output_stripe_configs,
+    const std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>&
+        home_map,
+    const CascaderOptions& options) {
+  std::vector<Plan> plans;
+  std::vector<Part> part_group{part};
+  // Create a selection of Plans per output_stripe_config
+  for (const auto& output_stripe_config : output_stripe_configs) {
+    // Calculate the input_stripe_configs
+    auto input_stripe_configs = part->CalculateInputStripeConfigs(output_stripe_config);
+    // From the input_stripe_configs, now derive all the possible input TensorConfigs
+    std::vector<std::vector<TensorConfig>> all_possible_input_configs;
+    size_t i = 0;
+    for (const auto& stripe_config : input_stripe_configs) {
+      Tensor tensor = part->GetInputTensors()[i];
+      all_possible_input_configs.push_back(
+          GetPossibleInputConfigs(stripe_config, tensor, home_map.at(tensor), options));
+      i++;
+    }
+    // Now work out all the possible combinations of input TensorConfigs
+    auto input_config_combinations =
+        EnumerateCombinations<TensorConfig>(all_possible_input_configs);
+    Tensor output_tensor = part->GetOutputTensor();
+    // Then determine the possible output TensorConfigs (no combinations here because there's only
+    // one output)
+    auto output_configs = GetPossibleOutputConfigs(output_stripe_config, output_tensor,
+                                                   home_map.at(output_tensor), options);
+    // Calculate the performance information for the output_stripe_config for both the recompute and
+    // rolling cases
+    PerformanceInfo rolling_perf =
+        part->GetPerformanceInfo(output_stripe_config, BufferMode::ROLLING);
+    PerformanceInfo recompute_perf =
+        part->GetPerformanceInfo(output_stripe_config, BufferMode::RECOMPUTE);
+    // For all the possible input TensorConfig combinations
+    for (const auto& input_configs : input_config_combinations) {
+      std::vector<TensorConfig> tensor_configs;
+      std::vector<TensorConfig> open_input_configs;
+      // Add the input TensorConfigs to the 'tensor_configs' and
+      // record which input TensorConfigs are 'open' (i.e. 'INTERIOR')
+      for (const auto& input_config : input_configs) {
+        tensor_configs.push_back(input_config);
+        if (input_config->GetState() == TensorConfigState::INTERIOR) {
+          open_input_configs.push_back(input_config);
+        }
+      }
+      for (const auto& output_config : output_configs) {
+        // Add the output TensorConfig to the tensor_configs and to
+        // the open configs (if it's 'INTERIOR')
+        tensor_configs.push_back(output_config);
+        std::vector<TensorConfig> open_configs = open_input_configs;
+        if (output_config->GetState() == TensorConfigState::INTERIOR) {
+          open_configs.push_back(output_config);
+        }
+        int bandwidth_cycles = 0;
+        int compute_cycles = 0;
+        int mem2mem_cycles = 0;
+
+        // Pick the correct performance info based on the BufferMode
+        PerformanceInfo perf_info;
+        if (output_config->GetBufferMode() == BufferMode::RECOMPUTE) {
+          perf_info = recompute_perf;
+        } else {
+          perf_info = rolling_perf;
+        }
+        // Calculate the bandwidth cycles by multiplying the bytes read/written by the
+        // bandwidth of the memories
+        for (size_t i = 0; i < input_configs.size(); i++) {
+          bandwidth_cycles +=
+              perf_info->read_bytes[i] / input_configs[i]->GetCopyRegion()->read_bandwidth;
+          if (input_configs[i]->DoCopy()) {
+            // This Tensor needs to be copied - Count stripes for this config
+            Tensor tensor = input_configs[i]->GetTensor();
+            for (const auto& stripe_config : input_configs[i]->GetStripeConfigs()) {
+              std::map<std::vector<int>, int> input_blocks = CountStripes(stripe_config, true);
+              for (const auto& block : input_blocks) {
+                int bytes_transferred = mul_reduce(block.first) * tensor->GetDataType().bytes() *
+                                        tensor->GetCompressionRatio() * block.second;
+                int read_cycles =
+                    bytes_transferred * input_configs[i]->GetHomeRegion()->read_bandwidth;
+                int write_cycles =
+                    bytes_transferred * input_configs[i]->GetCopyRegion()->write_bandwidth;
+                mem2mem_cycles += std::max(read_cycles, write_cycles);
+              }
+            }
+          }
+        }
+        bandwidth_cycles +=
+            perf_info->write_bytes / output_config->GetCopyRegion()->write_bandwidth;
+        compute_cycles = perf_info->compute_cycles;
+        // Take the max of compute and bandwidth cycles as we assume compute cycles
+        // can hide memory latency
+        int cycles = std::max(std::max(compute_cycles, bandwidth_cycles), mem2mem_cycles);
+        int memory_usage =
+            GetInteriorMemoryUsage(input_configs, output_config, options->cascade_region);
+        plans.push_back(Plan(tensor_configs, open_configs, output_config, part_group,
+                             options->cascade_region, memory_usage, cycles));
+      }
+    }
+  }
+  return plans;
+}
+
+std::unordered_map<std::vector<Part>, std::vector<Plan>> GenerateGraphPlans(
+    const CascaderGraph& graph,
+    const std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>&
+        home_map,
+    const CascaderOptions& options) {
+  ICHECK_GT(options->stripe_factors, 0)
+      << "stripe_factors = " << options->stripe_factors << ", but must be > 0";
+  ICHECK_GT(options->max_plan_size, 0)
+      << "max_plan_size = " << options->max_plan_size << ", but must be > 0";
+  // Define a map between the graph Tensors and possible StripeConfigs that the Tensor may be
+  // executed with
+  std::unordered_map<Tensor, std::set<StripeConfig>, ObjectPtrHash, ObjectPtrEqual>
+      stripe_configs_by_tensor;
+  // Define a map between a given open TensorConfig and all the Plans which provide it
+  std::unordered_map<TensorConfig, std::vector<Plan>> plans_by_config;
+  // Define a map between a group of connected Parts and all the closed plans covering them
+  std::unordered_map<std::vector<Part>, std::vector<Plan>> closed_plans;
+  // Define a nested map which indexes open plans by both Part group and the open TensorConfigs they
+  // provide. Note that we index in this way because Part group + open TensorConfigs combined
+  // defines a group of Plans which can be mutually Pareto culled. If we culled of Part group alone,
+  // we'd lose potentially valuable open Plans which could have gone on to be grown into Pareto
+  // optimal closed plans.
+  std::unordered_map<std::vector<Part>,
+                     std::unordered_map<std::vector<TensorConfig>, std::vector<Plan>>>
+      open_plans;
+  // Traverse the graph in a reverse topological order (should be enforced by GetPartOrder)
+  for (const auto& part : graph->GetPartOrder()) {
+    // First generate all the possible StripeConfigs for the Part assuming that it will become the
+    // output of a Plan. The number generated is a function of stripe_factors and the number of
+    // cascadable dimensions in the Part.
+    std::vector<StripeConfig> stripe_configs =
+        GenerateOutputStripeConfigs(part, options->stripe_factors);
+    // Check to see if the output Tensor is part of any existing open Plans
+    if (stripe_configs_by_tensor.find(part->GetOutputTensor()) != stripe_configs_by_tensor.end()) {
+      // If there are other open Plans which have this Part's output Tensor as an input, then
+      // additionally consider the StripeConfigs of those open TensorConfigs so that we have the
+      // option to merge into those open Plans.
+      const std::set<StripeConfig>& connecting_configs =
+          stripe_configs_by_tensor.at(part->GetOutputTensor());
+      std::copy(connecting_configs.begin(), connecting_configs.end(),
+                std::back_inserter(stripe_configs));
+    }
+    // Generate all the single Part Plans for the previously determined StripeConfigs
+    auto single_part_plans = GenerateSinglePlans(part, stripe_configs, home_map, options);
+    std::vector<Plan> plans;
+    for (const auto& partial_plan : single_part_plans) {
+      // If the output TensorConfig of the Plan is 'INTERIOR', then it must be merged with
+      // another open Plan
+      if (partial_plan->GetOutputConfig()->GetState() == TensorConfigState::INTERIOR) {
+        if (plans_by_config.find(partial_plan->GetOutputConfig()) != plans_by_config.end() &&
+            partial_plan->GetOutputConfig()->GetTensor()->GetConsumers().size() == 1) {
+          // Search for all the open Plans which require the same TensorConfig
+          const auto& join_plans = plans_by_config.at(partial_plan->GetOutputConfig());
+          for (const auto& join_plan : join_plans) {
+            // Only merge to form a new Plan if the resulting Plan size won't exceed the
+            // max_plan_size
+            if (join_plan->GetPartGroup().size() < static_cast<size_t>(options->max_plan_size)) {
+              if (partial_plan->GetMemoryUsage() + join_plan->GetMemoryUsage() <
+                  options->cascade_region->size) {
+                plans.push_back(partial_plan.Merge(join_plan));
+              }
+            }
+          }
+        }
+      } else {
+        // If the single Part Plan had a 'BOUNDARY' output TensorConfig, then it doesn't need
+        // merging and can stand on its own.
+        plans.push_back(partial_plan);
+      }
+    }
+    // For all the newly created Plans, update the various maps
+    std::unordered_set<std::vector<Part>> new_part_groups;
+    for (const auto& plan : plans) {
+      new_part_groups.insert(plan->GetPartGroup());
+      if (plan->IsClosed()) {
+        closed_plans[plan->GetPartGroup()].push_back(plan);
+      } else {
+        open_plans[plan->GetPartGroup()][plan->GetOpenConfigs()].push_back(plan);
+      }
+    }
+    // Now Pareto cull both the open and closed Plans to remove non-optimal Plans
+    // Additionally, once culled we update another two maps, the stripe_configs_by_tensor
+    // and plans_by_config maps.
+    for (const auto& part_group : new_part_groups) {
+      if (closed_plans.find(part_group) != closed_plans.end()) {
+        closed_plans[part_group] = ParetoCullPlans(closed_plans.at(part_group), 32);
+      }
+      for (const auto& it : open_plans[part_group]) {
+        auto pareto_plans = ParetoCullPlans(it.second, 8);
+        for (const auto& plan : pareto_plans) {
+          for (const auto& open_config : plan->GetOpenConfigs()) {
+            if (open_config != plan->GetOutputConfig()) {
+              for (const auto& stripe_config : open_config->GetStripeConfigs()) {
+                // Only add a StripeConfig if it contains for than one stripe
+                if (mul_reduce(stripe_config->GetStripes()) > 1) {
+                  stripe_configs_by_tensor[open_config->GetTensor()].insert(stripe_config);
+                }
+              }
+              plans_by_config[open_config].push_back(plan);
+            }
+          }
+        }
+      }
+    }
+  }
+  return closed_plans;
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateOutputStripeConfigs")
+    .set_body_typed([](Part part, int stripe_factors) {
+      if (stripe_factors < 0) {
+        return Array<StripeConfig>();
+      }
+      return Array<StripeConfig>(GenerateOutputStripeConfigs(part, stripe_factors));
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateSinglePlans")
+    .set_body_typed([](Part part, Array<StripeConfig> output_stripe_configs,
+                       Map<Tensor, Array<MemoryRegion>> home_map, CascaderOptions options) {
+      std::vector<StripeConfig> voutput_stripe_configs(output_stripe_configs.begin(),
+                                                       output_stripe_configs.end());
+      std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
+          mhome_map;
+      for (const auto& it : home_map) {
+        std::vector<MemoryRegion> home_regions;
+        for (const auto& i : it.second) {
+          home_regions.push_back(i);
+        }
+        mhome_map[it.first] = home_regions;
+      }
+      return Array<Plan>(GenerateSinglePlans(part, voutput_stripe_configs, mhome_map, options));
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateGraphPlans")
+    .set_body_typed([](CascaderGraph graph, Map<Tensor, Array<MemoryRegion>> home_map,
+                       CascaderOptions options) {
+      std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
+          mhome_map;
+      for (const auto& it : home_map) {
+        std::vector<MemoryRegion> home_regions;
+        for (const auto& i : it.second) {
+          home_regions.push_back(i);
+        }
+        mhome_map[it.first] = home_regions;
+      }
+      auto closed_plans = GenerateGraphPlans(graph, mhome_map, options);
+      Map<Array<Part>, Array<Plan>> tclosed_plans;
+      for (auto& it : closed_plans) {
+        Array<Part> part_arr(it.first.begin(), it.first.end());
+        Array<Plan> plan_arr(it.second);
+        tclosed_plans.Set(part_arr, plan_arr);
+      }
+      return tclosed_plans;
+    });
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/plan_generator.h b/src/contrib/ethosu/cascader/plan_generator.h
new file mode 100644
index 000000000000..947728addfd1
--- /dev/null
+++ b/src/contrib/ethosu/cascader/plan_generator.h
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/contrib/ethosu/cascader/plan_generator.h
+ * \brief Algorithm to generate possible Plans in the NPU cascader
+ */
+#ifndef TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
+#define TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
+
+#include <tvm/node/reflection.h>
+#include <tvm/runtime/object.h>
+
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+class CascaderGraph;
+class MemoryRegion;
+class Part;
+class Tensor;
+class StripeConfig;
+class Plan;
+class CascaderOptions;
+
+using HomeMap =
+    std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>;
+
+/*!
+ * \brief Generate possible output StripeConfigs that could be applied to a Part's output.
+ * \param part The Part to generate StripeConfigs for.
+ * \param stripe_factors How many striping factors to try per axis.
+ * \return The generated StripeConfigs for the Part's output.
+ */
+std::vector<StripeConfig> GenerateOutputStripeConfigs(const Part& part, int stripe_factors);
+
+/*!
+ * \brief Generate single-Part Plans for a Part for a given list of output StripeConfigs.
+ * \param part The Part to generate Plans for.
+ * \param output_stripe_configs The output StripeConfigs to generate Plans with.
+ * \param home_map The Tensor homing map defining valid memory homes for Tensors.
+ * \param options The configuration options with which to run the generator.
+ * \return The generated Plans covering the Part.
+ * \note For each of the output StripeConfigs provided, this algorithm will produce a number
+ * of Plans corresponding to different choices of Tensor homing/copying, buffer modes
+ * and INTERIOR/BOUNDARY states. For each of these variants, the Part's performance will
+ * be queried and the memory usage will be calculated.
+ */
+std::vector<Plan> GenerateSinglePlans(const Part& part,
+                                      const std::vector<StripeConfig>& output_stripe_configs,
+                                      const HomeMap& home_map, const CascaderOptions& options);
+
+/*!
+ * \brief Generate pareto optimal Plans for a Graph.
+ * \param graph The Graph to generate Plans for.
+ * \param home_map The Tensor homing map defining valid memory homes for Tensors.
+ * \param options The configuration options with which to run the generator.
+ * \return A map between Part groups and a list of pareto optimal Plans which cover that group.
+ * \note This algorithm does the following:
+ *
+ * Iterate Part-by-Part in a reversed topological ordering (starting at the output Parts and
+ * working towards the input Parts).
+ *
+ * For each Part:
+ *  1. Determine the possible StripeConfigs we might want to use to stripe the Part using
+ *     GenerateOutputStripeConfigs.
+ *  2. Additionally, collect all the StripeConfigs of open Plans that could connect to this
+ *     Part (i.e. the Plan has an open TensorConfig for the Part's output Tensor).
+ *  3. Use these two lists of StripeConfigs to produce single Part Plans with GenerateSinglePlans.
+ *  4. For the generated Plans that have an open output TensorConfig, try and merge these into
+ *     existing Plans which share an open input TensorConfig.
+ *  5. All Plans are then indexed by both the Part group they cover and their open TensorConfigs.
+ *  6. Plans which cover the same Part group and share the same open TensorConfigs are culled
+ *     using ParetoCullPlans.
+ *
+ * Once every Part has been visited, return the Plans with no open TensorConfigs indexed by Part
+ * group.
+ */
+std::unordered_map<std::vector<Part>, std::vector<Plan>> GenerateGraphPlans(
+    const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions& options);
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
+
+#endif  // TVM_CONTRIB_ETHOSU_CASCADER_PLAN_GENERATOR_H_
diff --git a/src/contrib/ethosu/cascader/tensor_config.cc b/src/contrib/ethosu/cascader/tensor_config.cc
new file mode 100644
index 000000000000..5e60f522fe4e
--- /dev/null
+++ b/src/contrib/ethosu/cascader/tensor_config.cc
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "tensor_config.h"
+
+#include <tvm/runtime/container/array.h>
+#include <tvm/runtime/object.h>
+#include <tvm/runtime/registry.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+void MemoryRegionNode::VisitAttrs(AttrVisitor* v) {
+  v->Visit("name", &name);
+  v->Visit("size", &size);
+  v->Visit("read_bandwidth", &read_bandwidth);
+  v->Visit("write_bandwidth", &write_bandwidth);
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.MemoryRegion")
+    .set_body_typed([](String name, int size, int read_bandwidth, int write_bandwidth) {
+      return MemoryRegion(name, size, read_bandwidth, write_bandwidth);
+    });
+
+TVM_REGISTER_NODE_TYPE(MemoryRegionNode);
+
+void TensorConfigNode::VisitAttrs(AttrVisitor* v) {
+  v->Visit("_tensor", &tensor_);
+  v->Visit("_home_region", &home_region_);
+  int state = static_cast<int>(state_);
+  v->Visit("_state", &state);
+  int buffer_mode = static_cast<int>(buffer_mode_);
+  v->Visit("_buffer_mode", &buffer_mode);
+  Array<StripeConfig> tmp_arr(stripe_configs_);
+  v->Visit("_stripe_configs", &tmp_arr);
+  v->Visit("_copy_tensor", &copy_tensor_);
+  v->Visit("_copy_region", &copy_region_);
+  int64_t tmp_hash = static_cast<int64_t>(hash_);
+  v->Visit("_hash", &tmp_hash);
+}
+
+int TensorConfigNode::GetBufferSize() const {
+  if (buffer_mode_ == BufferMode::RECOMPUTE) {
+    return GetRecomputeBufferSize_();
+  } else {
+    return GetRollingBufferSize_();
+  }
+}
+
+void TensorConfigNode::ComputeHash_() {
+  hash_ = ObjectHash()(tensor_);
+  hash_combine(&hash_, std::hash<std::string>()(home_region_->name));
+  hash_combine(&hash_, std::hash<int>()(static_cast<int>(state_)));
+  hash_combine(&hash_, std::hash<int>()(static_cast<int>(buffer_mode_)));
+  hash_combine(&hash_, hash_vector(stripe_configs_));
+  hash_combine(&hash_, std::hash<bool>()(copy_tensor_));
+  hash_combine(&hash_, std::hash<std::string>()(copy_region_->name));
+}
+
+int TensorConfigNode::GetRecomputeBufferSize_() const {
+  size_t buffer_size = 0;
+  for (const auto& stripe_config : stripe_configs_) {
+    buffer_size += mul_reduce(stripe_config->GetShape());
+  }
+  return buffer_size * tensor_->GetDataType().bytes() * tensor_->GetCompressionRatio();
+}
+
+int TensorConfigNode::GetRollingBufferSize_() const {
+  int buffer_size = 0;
+  for (const auto& stripe_config : stripe_configs_) {
+    int rolling_axis = -1;
+    for (size_t i = 0; i < stripe_config->GetOrder().size(); i++) {
+      // The axis must be striped (> 1 stripes) and ordered (order != 0)
+      if (stripe_config->GetStripes()[i] > 1 && stripe_config->GetOrder()[i] != 0) {
+        // If we've yet to find a possible rolling axis, use this one
+        if (rolling_axis == -1) {
+          rolling_axis = i;
+          continue;
+        }
+        // Otherwise, replace the rolling axis if the current axis has an earlier order
+        if (stripe_config->GetOrder()[i] < stripe_config->GetOrder()[rolling_axis]) {
+          rolling_axis = i;
+        }
+      }
+    }
+    // If we didn't find a rolling axis, just use axis 0
+    if (rolling_axis == -1) {
+      rolling_axis = 0;
+    }
+    int rolling_size = 1;
+    for (size_t i = 0; i < tensor_->GetShape().size(); i++) {
+      if (static_cast<int>(i) == rolling_axis) {
+        rolling_size *= stripe_config->GetShape()[i];
+      } else {
+        rolling_size *= tensor_->GetShape()[i];
+      }
+    }
+    buffer_size += rolling_size;
+  }
+  return buffer_size * tensor_->GetDataType().bytes() * tensor_->GetCompressionRatio();
+}
+
+TensorConfig::TensorConfig(const Tensor& tensor, const MemoryRegion& home_region,
+                           TensorConfigState state, BufferMode buffer_mode,
+                           const std::vector<StripeConfig>& stripe_configs, bool copy_tensor,
+                           const MemoryRegion& copy_region) {
+  auto n = make_object<TensorConfigNode>();
+  n->tensor_ = std::move(tensor);
+  n->home_region_ = std::move(home_region);
+  n->state_ = state;
+  n->buffer_mode_ = buffer_mode;
+  n->stripe_configs_ = std::move(stripe_configs);
+  n->copy_tensor_ = copy_tensor;
+  n->copy_region_ = std::move(copy_region);
+  n->ComputeHash_();
+  data_ = std::move(n);
+}
+
+inline bool TensorConfig::operator==(const TensorConfig& other) const {
+  if (get() == other.get()) return true;
+  if (get() == nullptr || other.get() == nullptr) return false;
+  if ((*this)->tensor_ == other->tensor_ && (*this)->home_region_ == other->home_region_ &&
+      (*this)->state_ == other->state_ && (*this)->buffer_mode_ == other->buffer_mode_ &&
+      (*this)->stripe_configs_ == other->stripe_configs_ &&
+      (*this)->copy_tensor_ == other->copy_tensor_ &&
+      (*this)->copy_region_ == other->copy_region_) {
+    return true;
+  }
+  return false;
+}
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfig")
+    .set_body_typed([](Tensor tensor, MemoryRegion home_region, int state, int buffer_mode,
+                       Array<StripeConfig> stripe_configs, bool copy_tensor,
+                       MemoryRegion copy_region) {
+      TensorConfigState estate = static_cast<TensorConfigState>(state);
+      BufferMode ebuffer_mode = static_cast<BufferMode>(buffer_mode);
+      std::vector<StripeConfig> vstripe_configs(stripe_configs.begin(), stripe_configs.end());
+      return TensorConfig(tensor, home_region, estate, ebuffer_mode, vstripe_configs, copy_tensor,
+                          copy_region);
+    });
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfigEqual")
+    .set_body_method(&TensorConfig::operator==);
+
+TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.TensorConfigGetBufferSize")
+    .set_body_method<TensorConfig>(&TensorConfigNode::GetBufferSize);
+
+TVM_REGISTER_NODE_TYPE(TensorConfigNode);
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
diff --git a/src/contrib/ethosu/cascader/tensor_config.h b/src/contrib/ethosu/cascader/tensor_config.h
new file mode 100644
index 000000000000..6a37f76ce085
--- /dev/null
+++ b/src/contrib/ethosu/cascader/tensor_config.h
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/contrib/ethosu/cascader/tensor_config.h
+ * \brief TensorConfig object for the NPU cascader
+ */
+#ifndef TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
+#define TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
+
+#include <tvm/node/reflection.h>
+#include <tvm/runtime/object.h>
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "graph.h"
+#include "stripe_config.h"
+
+namespace tvm {
+namespace contrib {
+namespace ethosu {
+namespace cascader {
+
+class MemoryRegionNode : public Object {
+ public:
+  void VisitAttrs(AttrVisitor* v);
+
+  /*! \brief The name of the region */
+  std::string name;
+  /*! \brief The size of the region */
+  int size;
+  /*! \brief The read bandwidth of the region in bytes per cycle */
+  int read_bandwidth;
+  /*! \brief The write bandwidth of the region in bytes per cycle */
+  int write_bandwidth;
+
+  static constexpr const char* _type_key = "contrib.ethosu.cascader.MemoryRegion";
+  TVM_DECLARE_FINAL_OBJECT_INFO(MemoryRegionNode, Object)
+};
+
+class MemoryRegion : public ObjectRef {
+ public:
+  MemoryRegion(std::string name, int size, int read_bandwidth, int write_bandwidth) {
+    auto n = make_object<MemoryRegionNode>();
+    n->name = name;
+    n->size = size;
+    n->read_bandwidth = read_bandwidth;
+    n->write_bandwidth = write_bandwidth;
+    data_ = std::move(n);
+  }
+
+  TVM_DEFINE_OBJECT_REF_METHODS(MemoryRegion, ObjectRef, MemoryRegionNode);
+};
+
+/*! \brief The 'state' of a TensorConfig as used in the Plan generation algorithm.
+ * BOUNDARY - Should describe a Plan input/output Tensor.
+ * INTERIOR - Should describe an intermediate Tensor in a 'closed' Plan.
+ */
+enum TensorConfigState { BOUNDARY, INTERIOR };
+
+/*! \brief Node to represent a TensorConfig */
+class TensorConfigNode : public Object {
+ public:
+  void VisitAttrs(AttrVisitor* v);
+
+  /*! \return The Tensor the config applies to */
+  const Tensor GetTensor() const { return tensor_; }
+  /*! \return The region where the tensor is allocated */
+  MemoryRegion GetHomeRegion() const { return home_region_; }
+  /*!
+   * \return The state of the TensorConfig.
+   * \note The TensorConfigState is only used as part of the Plan generation algorithm. For a Plan
+   * to be 'closed' (and therefore not subject to any further merging), all the TensorConfigs that
+   * describe Plan input or output Tensors must be in the 'BOUNDARY' state with the rest being
+   * 'INTERIOR'. If any of the input or output tensors are described by an 'INTERIOR' TensorConfig,
+   * then the Plan is 'open' and should be merged with other 'open' Plans until the result becomes
+   * 'closed'.
+   */
+  TensorConfigState GetState() const { return state_; }
+  /*!
+   * \return The mode in which the buffer should be realized
+   * \note There are multiple buffering strategies by which a tensor may be realized (computed).
+   * These affect the amount of recomputation necessary as well as the size of buffer required to
+   * store the tensor. See 'BufferMode' for a description of the allowable buffering modes.
+   */
+  BufferMode GetBufferMode() const { return buffer_mode_; }
+  /*!
+   * \return Whether to copy the tensor.
+   * \note While a tensor will originally reside in its home region, the TensorConfig may optionally
+   * specify that the tensor should be copied (according to the StripeConfigs) into another
+   * MemoryRegion. As an example for where this may be used, if a weights tensor initially resides
+   * in slow Flash memory then necessarily the home region will be Flash. However, if the weights
+   * values are used multiple times by a Part, it may be more performant to choose to copy the
+   * weights into a faster memory like SRAM.
+   */
+  bool DoCopy() const { return copy_tensor_; }
+  /*! \return The region to copy the tensor to */
+  MemoryRegion GetCopyRegion() const {
+    if (!copy_tensor_) {
+      return home_region_;
+    }
+    return copy_region_;
+  }
+  /*!
+   * \return The StripeConfigs with which to compute the tensor.
+   * \note The StripeConfigs determine the order in which the elements of the tensor should be
+   * computed, including potentially computing them multiple times (recompute). Multiple
+   * StripeConfigs are used over just a single StripeConfig for the case where the tensor is
+   * consumed by two different Parts executing themselves with different StripeConfigs. In this
+   * case, there is a StripeConfig per consumer of the tensor.
+   */
+  const std::vector<StripeConfig> GetStripeConfigs() const { return stripe_configs_; }
+  /*!
+   * \return The size of the buffer needed for the TensorConfig.
+   * \note The size of buffer necessary to store a tensor being produced using the TensorConfig is
+   * not necessarily just the size of the tensor. In Plans, a tensor may be being produced and
+   * consumed in 'stripes' which are smaller than the full tensor. Therefore, the buffer necessary
+   * to store the tensor may only need to be as large as the stripe. The precise size of the buffer
+   * will depend both on the BufferMode and StripeConfigs (as well as, of course, the Tensor).
+   */
+  int GetBufferSize() const;
+  /*! \return The hash of the TensorConfigNode */
+  size_t GetHash() const { return hash_; }
+
+  static constexpr const char* _type_key = "contrib.ethosu.cascader.TensorConfig";
+  TVM_DECLARE_FINAL_OBJECT_INFO(TensorConfigNode, Object);
+
+ protected:
+  friend class TensorConfig;
+
+  /*! \brief Compute the hash of the TensorConfigNode */
+  void ComputeHash_();
+
+  /*! \return The size of the recompute buffer needed*/
+  int GetRecomputeBufferSize_() const;
+  /*! \return The size of the rolling buffer needed*/
+  int GetRollingBufferSize_() const;
+
+  /*! \brief The Tensor the config applies to */
+  Tensor tensor_;
+  /*! \brief The region where the tensor is allocated */
+  MemoryRegion home_region_;
+  /*! \return The state of the TensorConfig */
+  TensorConfigState state_;
+  /*! \brief The mode in which the buffer should be realized */
+  BufferMode buffer_mode_;
+  /*! \return The StripeConfigs with which to compute the tensor */
+  std::vector<StripeConfig> stripe_configs_;
+  /*! \brief Whether to copy the tensor */
+  bool copy_tensor_;
+  /*! \brief The region to copy the tensor to */
+  MemoryRegion copy_region_;
+  /*! \brief The hash of the TensorConfigNode */
+  size_t hash_{0};
+};
+
+/*!
+ * \brief A class which describes how to realize a Tensor.
+ * \note The TensorConfig describes both how a Tensor is scheduled (the order in which it's
+ * produced/consumed) and how its allocated in memory (which region it should reside in and whether
+ * it should be copied). For further detail on how TensorConfig stores this information, consult the
+ * documentation of TensorConfigNode.
+ */
+class TensorConfig : public ObjectRef {
+ public:
+  TensorConfig(const Tensor& tensor, const MemoryRegion& home_region, TensorConfigState state,
+               BufferMode buffer_mode, const std::vector<StripeConfig>& stripe_configs,
+               bool copy_tensor, const MemoryRegion& copy_region);
+  /*!
+   * \brief Check if two TensorConfigs are equal to each other.
+   * \param other TensorConfig to be checked.
+   * \return Whether the two TensorConfigs equal each other.
+   */
+  bool operator==(const TensorConfig& other) const;
+
+  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(TensorConfig, ObjectRef, TensorConfigNode);
+};
+
+}  // namespace cascader
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tvm
+
+// Hash and equal function for TensorConfig
+namespace std {
+
+/*! \brief The equal_to function for tvm::contrib::ethosu::cascader::TensorConfig */
+template <>
+struct equal_to<::tvm::contrib::ethosu::cascader::TensorConfig> {
+  bool operator()(const ::tvm::contrib::ethosu::cascader::TensorConfig& lhs,
+                  const ::tvm::contrib::ethosu::cascader::TensorConfig& rhs) const {
+    return lhs == rhs;
+  }
+};
+
+/*! \brief The hash function for tvm::contrib::ethosu::cascader::TensorConfig */
+template <>
+struct hash<::tvm::contrib::ethosu::cascader::TensorConfig> {
+  std::size_t operator()(
+      const ::tvm::contrib::ethosu::cascader::TensorConfig& tensor_config) const {
+    return tensor_config->GetHash();
+  }
+};
+
+}  // namespace std
+
+#endif  // TVM_CONTRIB_ETHOSU_CASCADER_TENSOR_CONFIG_H_
diff --git a/tests/python/contrib/test_ethosu/cascader/conftest.py b/tests/python/contrib/test_ethosu/cascader/conftest.py
index eacf57c251a8..21ed401994c1 100644
--- a/tests/python/contrib/test_ethosu/cascader/conftest.py
+++ b/tests/python/contrib/test_ethosu/cascader/conftest.py
@@ -22,6 +22,23 @@
 except ImportError:
     ethosu_enabled = False
 
+import tvm.contrib.ethosu.cascader as cs
+
+
+@pytest.fixture
+def FLASH():
+    return cs.MemoryRegion(name="FLASH", size=10 ** 7, read_bandwidth=4, write_bandwidth=4)
+
+
+@pytest.fixture
+def DRAM():
+    return cs.MemoryRegion(name="DRAM", size=10 ** 9, read_bandwidth=8, write_bandwidth=8)
+
+
+@pytest.fixture
+def SRAM():
+    return cs.MemoryRegion(name="SRAM", size=10 ** 6, read_bandwidth=16, write_bandwidth=16)
+
 
 if ethosu_enabled:
     import tvm
@@ -35,6 +52,52 @@
         make_ethosu_binary_elementwise,
     )
 
+    def make_TwoConv2DTE():
+        def _get_func():
+            ifm = relay.var("ifm", shape=(1, 12, 12, 8), dtype="int8")
+            conv1 = make_ethosu_conv2d(
+                ifm=ifm,
+                ifm_channels=8,
+                ofm_channels=32,
+                kernel_shape=(1, 1),
+                padding=(0, 0),
+                strides=(1, 1),
+                dilation=(1, 1),
+                activation="NONE",
+                ifm_layout="NHWC",
+                ofm_layout="NHCWB16",
+            )
+            conv2 = make_ethosu_conv2d(
+                ifm=conv1,
+                ifm_channels=32,
+                ofm_channels=16,
+                kernel_shape=(3, 3),
+                padding=(1, 1),
+                strides=(1, 1),
+                dilation=(1, 1),
+                activation="NONE",
+                ifm_layout="NHCWB16",
+                ofm_layout="NHWC",
+            )
+            func = relay.Function(relay.analysis.free_vars(conv2), conv2)
+            func = run_opt_pass(func, relay.transform.InferType())
+            return func
+
+        func = _get_func()
+        te_graph, const_dict = create_te_graph(func)
+        sch = tvm.te.create_schedule([t.op for t in te_graph.outputs])
+        return sch, te_graph, const_dict
+
+    @pytest.fixture
+    def TwoConv2DTE():
+        return make_TwoConv2DTE()
+
+    @pytest.fixture
+    def TwoConv2DGraph():
+        _, te_graph, const_dict = make_TwoConv2DTE()
+        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
+        return cs.create_cascader_graph(te_graph, const_dict, device_config)
+
     def make_TwoConv2DWithSliceTE():
         def _get_func():
             ifm = relay.var("ifm", shape=(1, 12, 12, 8), dtype="int8")
@@ -76,6 +139,12 @@ def _get_func():
     def TwoConv2DWithSliceTE():
         return make_TwoConv2DWithSliceTE()
 
+    @pytest.fixture
+    def TwoConv2DWithSliceGraph():
+        _, te_graph, const_dict = make_TwoConv2DWithSliceTE()
+        device_config = cs.EthosuDeviceConfig("ethos-u55-256")
+        return cs.create_cascader_graph(te_graph, const_dict, device_config)
+
     def make_MobileNetv2DiamondTE():
         def _get_func():
             ifm = relay.var("ifm", shape=(1, 56, 56, 96), dtype="int8")
diff --git a/tests/python/contrib/test_ethosu/cascader/infra.py b/tests/python/contrib/test_ethosu/cascader/infra.py
index 5f41dce30147..aa681c41f210 100644
--- a/tests/python/contrib/test_ethosu/cascader/infra.py
+++ b/tests/python/contrib/test_ethosu/cascader/infra.py
@@ -14,113 +14,147 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+ethosu_enabled = True
+try:
+    import ethosu.vela
+except ImportError:
+    ethosu_enabled = False
+
 import tvm
 from tvm import relay
-from tvm.relay.backend.contrib.ethosu.tir.compiler import extract_constants, lower_to_te
-
+import tvm.contrib.ethosu.cascader as cs
 import numpy as np
 
 
-def create_te_graph(func):
-    func, consts = extract_constants(func)
-    mod = tvm.IRModule.from_expr(func)
-    func = relay.transform.InferType()(mod)["main"]
-    te_graph = lower_to_te(func)
-    return te_graph, consts
+def make_options(
+    cascade_region: cs.MemoryRegion,
+    max_proposals: int = 1,
+    stripe_factors: int = 1,
+    max_plan_size: int = 1,
+    always_copy_size: int = 1024,
+):
+    return cs.CascaderOptions(
+        cascade_region=cascade_region,
+        max_proposals=max_proposals,
+        stripe_factors=stripe_factors,
+        max_plan_size=max_plan_size,
+        always_copy_size=always_copy_size,
+    )
 
 
-def make_matrices(
-    op_type, kernel, stride, padding, ifm_layout, ofm_layout, dilation=(1, 1), ifm_channels=1
-):
-    kernel_h, kernel_w = kernel
-    stride_h, stride_w = stride
-    dilation_h, dilation_w = dilation
-    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
-    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
-    if op_type == "ethosu_conv2d":
-        ifm_matrix = [
-            [1, 0, 0, 0, 0],
-            [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-            [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-            [0, 0, 0, 0, ifm_channels],
-            [0, 0, 0, 0, 1],
-        ]
-        weight_matrix = [
-            [0, 0, 0, 1, 0],
-            [0, 0, 0, 0, kernel_h],
-            [0, 0, 0, 0, kernel_w],
-            [0, 0, 0, 0, ifm_channels],
-            [0, 0, 0, 0, 1],
-        ]
-    elif op_type == "ethosu_depthwise_conv2d":
-        ifm_matrix = [
+def make_simple_home_map(graph, var_region, const_region):
+    home_map = {}
+    for tensor in graph.tensor_order:
+        if tensor.is_constant:
+            home_map[tensor] = [const_region]
+        else:
+            home_map[tensor] = [var_region]
+
+    return home_map
+
+
+if ethosu_enabled:
+    from tvm.relay.backend.contrib.ethosu.tir.compiler import extract_constants, lower_to_te
+
+    def create_te_graph(func):
+        func, consts = extract_constants(func)
+        mod = tvm.IRModule.from_expr(func)
+        func = relay.transform.InferType()(mod)["main"]
+        te_graph = lower_to_te(func)
+        return te_graph, consts
+
+    def make_matrices(
+        op_type, kernel, stride, padding, ifm_layout, ofm_layout, dilation=(1, 1), ifm_channels=1
+    ):
+        kernel_h, kernel_w = kernel
+        stride_h, stride_w = stride
+        dilation_h, dilation_w = dilation
+        dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
+        dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
+        nhwc_to_nhcwb16 = [
             [1, 0, 0, 0, 0],
-            [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-            [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
-            [0, 0, 0, 1, 0],
+            [0, 1, 0, 0, 0],
+            [0, 0, 0, 1 / 16, 0],
+            [0, 0, 1, 0, 0],
+            [0, 0, 0, 0, 16],
             [0, 0, 0, 0, 1],
         ]
-        weight_matrix = [
-            [0, 0, 0, 1, 0],
-            [0, 0, 0, 0, kernel_h],
-            [0, 0, 0, 0, kernel_w],
-            [0, 0, 0, 0, 1],
-            [0, 0, 0, 0, 1],
+        nhcwb16_to_nhwc = [
+            [1, 0, 0, 0, 0, 0],
+            [0, 1, 0, 0, 0, 0],
+            [0, 0, 0, 1, 0, 0],
+            [0, 0, 16, 0, 1, -16],
+            [0, 0, 0, 0, 0, 1],
         ]
-    elif op_type == "ethosu_pooling":
-        ifm_matrix = [
-            [1, 0, 0, 0, 0],
-            [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
-            [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
+        if op_type == "ethosu_conv2d":
+            ifm_matrix = [
+                [1, 0, 0, 0, 0],
+                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
+                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
+                [0, 0, 0, 0, ifm_channels],
+                [0, 0, 0, 0, 1],
+            ]
+            weight_matrix = [
+                [0, 0, 0, 1, 0],
+                [0, 0, 0, 0, kernel_h],
+                [0, 0, 0, 0, kernel_w],
+                [0, 0, 0, 0, ifm_channels],
+                [0, 0, 0, 0, 1],
+            ]
+        elif op_type == "ethosu_depthwise_conv2d":
+            ifm_matrix = [
+                [1, 0, 0, 0, 0],
+                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
+                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
+                [0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 1],
+            ]
+            weight_matrix = [
+                [0, 0, 0, 1, 0],
+                [0, 0, 0, 0, kernel_h],
+                [0, 0, 0, 0, kernel_w],
+                [0, 0, 0, 0, 1],
+                [0, 0, 0, 0, 1],
+            ]
+        elif op_type == "ethosu_pooling":
+            ifm_matrix = [
+                [1, 0, 0, 0, 0],
+                [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
+                [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
+                [0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 1],
+            ]
+            weight_matrix = [
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+            ]
+        scale_bias_matrix = [
             [0, 0, 0, 1, 0],
+            [0, 0, 0, 0, 10],
             [0, 0, 0, 0, 1],
         ]
-        weight_matrix = [
-            [0, 0, 0, 0, 0],
-            [0, 0, 0, 0, 0],
-            [0, 0, 0, 0, 0],
-            [0, 0, 0, 0, 0],
-            [0, 0, 0, 0, 0],
-        ]
-    scale_bias_matrix = [
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 10],
-        [0, 0, 0, 0, 1],
-    ]
-    if ofm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
-        weight_matrix = np.matmul(weight_matrix, nhcwb16_to_nhwc).tolist()
-        scale_bias_matrix = np.matmul(scale_bias_matrix, nhcwb16_to_nhwc).tolist()
-    if ifm_layout == "NHCWB16":
-        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
+        if ofm_layout == "NHCWB16":
+            ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
+            weight_matrix = np.matmul(weight_matrix, nhcwb16_to_nhwc).tolist()
+            scale_bias_matrix = np.matmul(scale_bias_matrix, nhcwb16_to_nhwc).tolist()
+        if ifm_layout == "NHCWB16":
+            ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
 
-    ifm_offset = (
-        [0, -padding[0], -padding[1], 0]
-        if ifm_layout == "NHWC"
-        else [0, -padding[0], 0, -padding[1], 0]
-    )
-    weight_offset = [0, 0, 0, 0]
-    scale_bias_offset = [0, 0]
-    return (
-        ifm_matrix,
-        ifm_offset,
-        weight_matrix,
-        weight_offset,
-        scale_bias_matrix,
-        scale_bias_offset,
-    )
+        ifm_offset = (
+            [0, -padding[0], -padding[1], 0]
+            if ifm_layout == "NHWC"
+            else [0, -padding[0], 0, -padding[1], 0]
+        )
+        weight_offset = [0, 0, 0, 0]
+        scale_bias_offset = [0, 0]
+        return (
+            ifm_matrix,
+            ifm_offset,
+            weight_matrix,
+            weight_offset,
+            scale_bias_matrix,
+            scale_bias_offset,
+        )
diff --git a/tests/python/contrib/test_ethosu/cascader/test_pareto.py b/tests/python/contrib/test_ethosu/cascader/test_pareto.py
new file mode 100644
index 000000000000..2d897a79310f
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/cascader/test_pareto.py
@@ -0,0 +1,149 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from tvm.tir import IntImm
+from tvm.contrib.ethosu.cascader.pareto import (
+    _get_pareto_frontier,
+    _thin_vector,
+    _pareto_cull_plans,
+)
+from tvm.contrib.ethosu.cascader import (
+    Plan,
+    StripeConfig,
+    TensorConfig,
+    TensorConfigState,
+    BufferMode,
+    Tensor,
+)
+
+import pytest
+import numpy as np
+
+
+def _ref_get_pareto_frontier(costs):
+    is_efficient = np.ones(costs.shape[0], dtype=bool)
+    for i, c in enumerate(costs):
+        if is_efficient[i]:
+            is_efficient[is_efficient] = np.any(
+                costs[is_efficient] < c, axis=1
+            )  # Keep any point with a lower cost
+            is_efficient[i] = True  # And keep self
+    return is_efficient
+
+
+def _ref_thin_vector(vec, max_size):
+    if max_size < 1:
+        return []
+    if len(vec) <= max_size or len(vec) == 0:
+        return vec
+    if max_size == 1:
+        return [vec[0]]
+    samples = np.linspace(0, len(vec), max_size - 1, endpoint=False).astype(int)
+    samples = np.append(samples, len(vec) - 1)
+    return vec[samples]
+
+
+def _ref_pareto_cull_plans(plans, points):
+    if len(plans) <= points:
+        return plans
+    plans = np.array(sorted(plans, key=lambda x: x.memory_usage))
+    costs = []
+    for plan in plans:
+        costs.append(np.array([plan.memory_usage, plan.cycles]))
+    is_efficient = _ref_get_pareto_frontier(np.array(costs))
+    culled_plans = plans[is_efficient]
+    thinned_plans = (
+        culled_plans
+        if len(culled_plans) <= points
+        else _ref_thin_vector(np.array(culled_plans), points)
+    )
+    return thinned_plans
+
+
+@pytest.mark.parametrize("num_costs", [1, 10, 30, 100, 300, 1000])
+def test_get_pareto_frontier(num_costs):
+    cost_low = 1
+    cost_high = 100
+    dims = 2
+    costs = []
+    for i in range(num_costs):
+        costs.append(list(np.random.randint(cost_low, cost_high, size=(dims,))))
+    reference = list(_ref_get_pareto_frontier(np.array(costs)))
+    result = _get_pareto_frontier(costs)
+    assert result == reference
+
+
+@pytest.mark.parametrize("vec_length", [0, 1, 10, 25, 100])
+@pytest.mark.parametrize("max_size", [0, 1, 2, 5, 11, 51])
+def test_thin_vector(vec_length, max_size):
+    def _make_vector(length):
+        vector = []
+        for i in range(length):
+            obj = IntImm("int32", i)
+            vector.append(obj)
+
+        return vector
+
+    vector = _make_vector(vec_length)
+    reference = list(_ref_thin_vector(np.array(vector), max_size))
+    result = _thin_vector(vector, max_size)
+    assert result == reference
+
+
+@pytest.mark.parametrize("num_plans", [0, 1, 10, 25, 100])
+@pytest.mark.parametrize("max_plans", [0, 1, 2, 5, 11, 51])
+def test_pareto_cull_plans(num_plans, max_plans, SRAM):
+    memory_usage_low = 1
+    memory_usage_high = 1000
+    cycles_low = 100
+    cycles_high = 10000
+
+    def _make_plan(memory_usage, cycles):
+        output_config = TensorConfig(
+            tensor=Tensor([1], "int8"),
+            home_region=SRAM,
+            state=TensorConfigState.BOUNDARY,
+            buffer_mode=BufferMode.RECOMPUTE,
+            stripe_configs=[StripeConfig([1], [1], [1], [1], [1], [0])],
+        )
+        return Plan(
+            tensor_configs={},
+            open_configs=[],
+            output_config=output_config,
+            part_group=[],
+            interior_region=SRAM,
+            memory_usage=memory_usage,
+            cycles=cycles,
+        )
+
+    def _make_plans(num):
+        plans = []
+        for _ in range(num):
+            memory_usage = np.random.randint(memory_usage_low, memory_usage_high)
+            cycles = np.random.randint(cycles_low, cycles_high)
+            plan = _make_plan(memory_usage, cycles)
+            plans.append(plan)
+
+        return plans
+
+    plans = _make_plans(num_plans)
+    reference = list(_ref_pareto_cull_plans(plans, max_plans))
+    result = _pareto_cull_plans(plans, max_plans)
+    assert result == reference
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/tests/python/contrib/test_ethosu/cascader/test_plan.py b/tests/python/contrib/test_ethosu/cascader/test_plan.py
new file mode 100644
index 000000000000..ddc40b49ac8a
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/cascader/test_plan.py
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import tvm.contrib.ethosu.cascader as cs
+
+import pytest
+
+
+def test_plan(DRAM, SRAM):
+    subgraph = cs.TESubgraph([], None)
+    part = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+        ],
+    )
+    tensor_1 = cs.Tensor([10, 10], "uint8")
+    tensor_2 = cs.Tensor([10, 10], "uint8")
+
+    part.set_input(0, tensor_1)
+    part.set_output(tensor_2)
+    tensor_1.add_consumer(part)
+    tensor_2.add_producer(part)
+
+    output_stripe_config = cs.StripeConfig(
+        shape=[5, 5],
+        extent=[10, 10],
+        strides=[5, 5],
+        order=[1, 2],
+        stripes=[2, 2],
+        offset=[0, 0],
+    )
+    tensor_config_out = cs.TensorConfig(
+        tensor=tensor_2,
+        home_region=DRAM,
+        state=cs.TensorConfigState.BOUNDARY,
+        buffer_mode=cs.BufferMode.RECOMPUTE,
+        stripe_configs=[output_stripe_config],
+        copy_tensor=False,
+    )
+    input_stripe_config = part.calculate_input_stripe_configs(output_stripe_config)[0]
+    tensor_config_in = cs.TensorConfig(
+        tensor=tensor_1,
+        home_region=DRAM,
+        state=cs.TensorConfigState.INTERIOR,
+        buffer_mode=cs.BufferMode.ROLLING,
+        stripe_configs=[input_stripe_config],
+        copy_tensor=False,
+    )
+    tensor_configs = {tensor_1: tensor_config_in, tensor_2: tensor_config_out}
+    open_configs = frozenset([tensor_config_in])
+    part_group = frozenset([part])
+    interior_region = SRAM
+    memory_usage = 100
+    cycles = 20
+    plan = cs.Plan(
+        tensor_configs=tensor_configs,
+        open_configs=open_configs,
+        output_config=tensor_config_out,
+        part_group=part_group,
+        interior_region=interior_region,
+        memory_usage=memory_usage,
+        cycles=cycles,
+    )
+
+    assert plan.tensor_configs == tensor_configs
+    assert plan.open_configs == open_configs
+    assert plan.output_config == tensor_config_out
+    assert plan.part_group == part_group
+    assert plan.interior_region == interior_region
+    assert plan.memory_usage == memory_usage
+    assert plan.cycles == cycles
+
+
+def test_plan_merge(DRAM, SRAM):
+    subgraph = cs.TESubgraph([], None)
+    part_1 = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+        ],
+    )
+    part_2 = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+            cs.Propagator(
+                [[0, 0, 6], [0, 0, 6], [0, 0, 1]],
+                [0, 0],
+            ),
+            cs.Propagator(
+                [[1, 0], [0, 1]],
+                [0],
+            ),
+        ],
+    )
+    tensor_1 = cs.Tensor([20, 20], "uint8")
+    tensor_2 = cs.Tensor([10, 10], "uint8")
+    tensor_3 = cs.Tensor([6, 6], "uint8")
+    tensor_4 = cs.Tensor([10], "uint8")
+    tensor_5 = cs.Tensor([10, 10], "uint8")
+
+    part_1.set_input(0, tensor_1)
+    part_1.set_output(tensor_2)
+    tensor_1.add_consumer(part_1)
+    tensor_2.add_producer(part_1)
+
+    part_2.set_input(0, tensor_2)
+    part_2.set_input(1, tensor_3)
+    part_2.set_input(2, tensor_4)
+    part_2.set_output(tensor_5)
+    tensor_2.add_consumer(part_2)
+    tensor_3.add_consumer(part_2)
+    tensor_4.add_consumer(part_2)
+    tensor_5.add_producer(part_2)
+
+    output_stripe_config = cs.StripeConfig(
+        shape=[5, 5],
+        extent=[10, 10],
+        strides=[5, 5],
+        order=[1, 2],
+        stripes=[2, 2],
+        offset=[0, 0],
+    )
+    tensor_config_5 = cs.TensorConfig(
+        tensor=tensor_5,
+        home_region=DRAM,
+        state=cs.TensorConfigState.BOUNDARY,
+        buffer_mode=cs.BufferMode.RECOMPUTE,
+        stripe_configs=[output_stripe_config],
+        copy_tensor=False,
+    )
+    input_stripe_configs = part_2.calculate_input_stripe_configs(output_stripe_config)
+    tensor_config_4 = cs.TensorConfig(
+        tensor=tensor_4,
+        home_region=DRAM,
+        state=cs.TensorConfigState.BOUNDARY,
+        buffer_mode=cs.BufferMode.RECOMPUTE,
+        stripe_configs=[input_stripe_configs[2]],
+        copy_tensor=False,
+    )
+    tensor_config_3 = cs.TensorConfig(
+        tensor=tensor_3,
+        home_region=SRAM,
+        state=cs.TensorConfigState.INTERIOR,
+        buffer_mode=cs.BufferMode.RECOMPUTE,
+        stripe_configs=[input_stripe_configs[1]],
+        copy_tensor=False,
+    )
+    tensor_config_2 = cs.TensorConfig(
+        tensor=tensor_2,
+        home_region=SRAM,
+        state=cs.TensorConfigState.INTERIOR,
+        buffer_mode=cs.BufferMode.ROLLING,
+        stripe_configs=[input_stripe_configs[0]],
+        copy_tensor=False,
+    )
+    input_stripe_config = part_1.calculate_input_stripe_configs(input_stripe_configs[0])[0]
+    tensor_config_1 = cs.TensorConfig(
+        tensor=tensor_1,
+        home_region=DRAM,
+        state=cs.TensorConfigState.BOUNDARY,
+        buffer_mode=cs.BufferMode.ROLLING,
+        stripe_configs=[input_stripe_config],
+        copy_tensor=False,
+    )
+    tensor_configs = {tensor_1: tensor_config_1, tensor_2: tensor_config_2}
+    open_configs = frozenset([tensor_config_2])
+    part_group = frozenset([part_1])
+    interior_region = SRAM
+    memory_usage = 100
+    cycles = 20
+    plan_1 = cs.Plan(
+        tensor_configs=tensor_configs,
+        open_configs=open_configs,
+        output_config=tensor_config_2,
+        part_group=part_group,
+        interior_region=interior_region,
+        memory_usage=memory_usage,
+        cycles=cycles,
+    )
+
+    tensor_configs = {
+        tensor_2: tensor_config_2,
+        tensor_3: tensor_config_3,
+        tensor_4: tensor_config_4,
+        tensor_5: tensor_config_5,
+    }
+    open_configs = frozenset([tensor_config_2, tensor_config_3])
+    part_group = frozenset([part_2])
+    interior_region = SRAM
+    memory_usage = 200
+    cycles = 30
+    plan_2 = cs.Plan(
+        tensor_configs=tensor_configs,
+        open_configs=open_configs,
+        output_config=tensor_config_5,
+        part_group=part_group,
+        interior_region=interior_region,
+        memory_usage=memory_usage,
+        cycles=cycles,
+    )
+
+    merged_plan = plan_1.merge(plan_2)
+
+    assert merged_plan.tensor_configs == {
+        tensor_1: tensor_config_1,
+        tensor_2: tensor_config_2,
+        tensor_3: tensor_config_3,
+        tensor_4: tensor_config_4,
+        tensor_5: tensor_config_5,
+    }
+    assert merged_plan.open_configs == frozenset([tensor_config_3])
+    assert merged_plan.output_config == tensor_config_5
+    assert merged_plan.part_group == frozenset([part_1, part_2])
+    assert merged_plan.interior_region == interior_region
+    assert merged_plan.memory_usage == plan_1.memory_usage + plan_2.memory_usage
+    assert merged_plan.cycles == plan_1.cycles + plan_2.cycles
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py b/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py
new file mode 100644
index 000000000000..ffee071f0e95
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/cascader/test_plan_generator.py
@@ -0,0 +1,180 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+
+import tvm.contrib.ethosu.cascader as cs
+from .infra import make_simple_home_map, make_options, ethosu_enabled
+
+from tvm.contrib.ethosu.cascader.plan_generator import (
+    _generate_output_stripe_configs,
+    _generate_single_plans,
+    _generate_graph_plans,
+)
+
+
+def test_generate_output_stripe_configs():
+    stripe_factors = 3
+    expected_configs = 13
+    subgraph = cs.TESubgraph([], None)
+    part_1 = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+        ],
+    )
+    tensor_1 = cs.Tensor([800, 800], "uint8")
+    tensor_2 = cs.Tensor([400, 400], "uint8")
+
+    part_1.set_input(0, tensor_1)
+    part_1.set_output(tensor_2)
+    tensor_1.add_consumer(part_1)
+    tensor_2.add_producer(part_1)
+
+    assert len(_generate_output_stripe_configs(part_1, stripe_factors)) == expected_configs
+
+
+def test_generate_single_plans(SRAM, DRAM):
+    subgraph = cs.TESubgraph([], None)
+    part_1 = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+        ],
+    )
+    tensor_1 = cs.Tensor([800, 800], "int8")
+    tensor_2 = cs.Tensor([400, 400], "int8")
+
+    part_1.set_input(0, tensor_1)
+    part_1.set_output(tensor_2)
+    tensor_1.add_consumer(part_1)
+    tensor_2.add_producer(part_1)
+
+    home_map = {
+        tensor_1: [SRAM, DRAM],
+        tensor_2: [SRAM],
+    }
+    options = make_options(cascade_region=SRAM, stripe_factors=1)
+    output_stripe_configs = _generate_output_stripe_configs(part_1, options.stripe_factors)
+    plans = _generate_single_plans(part_1, output_stripe_configs, home_map, options)
+    for plan in plans:
+        assert plan.interior_region == SRAM
+        assert plan.part_group == frozenset([part_1])
+        assert set(plan.tensor_configs.keys()) == set([tensor_1, tensor_2])
+        for open_config in plan.open_configs:
+            assert open_config.state == cs.TensorConfigState.INTERIOR
+
+
+def test_generate_graph_plans(SRAM, DRAM):
+    num_part_groups = 3
+    stripe_factors = 4
+    max_plan_size = 10
+    subgraph = cs.TESubgraph([], None)
+    part_a = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+            cs.Propagator(
+                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
+                [-1, -1],
+            ),
+        ],
+    )
+    part_b = cs.InlinePart(
+        subgraph,
+        [
+            cs.Propagator(
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [0, 0],
+            ),
+        ],
+    )
+    tensor_1 = cs.Tensor([10, 10], "int8")
+    tensor_2 = cs.Tensor([9, 9], "int8")
+    tensor_3 = cs.Tensor([10, 10], "int8")
+    tensor_4 = cs.Tensor([10, 10], "int8")
+
+    part_a.set_input(0, tensor_1)
+    part_a.set_input(1, tensor_2)
+    part_a.set_output(tensor_3)
+    tensor_1.add_consumer(part_a)
+    tensor_2.add_consumer(part_a)
+    tensor_3.add_producer(part_a)
+    part_b.set_input(0, tensor_3)
+    part_b.set_output(tensor_4)
+    tensor_3.add_consumer(part_b)
+    tensor_4.add_producer(part_b)
+
+    graph = cs.CascaderGraph([tensor_1, tensor_2], [tensor_4])
+    home_map = {
+        tensor_1: [SRAM, DRAM],
+        tensor_2: [SRAM],
+        tensor_3: [SRAM],
+        tensor_4: [SRAM, DRAM],
+    }
+
+    options = make_options(
+        cascade_region=SRAM,
+        stripe_factors=stripe_factors,
+        max_plan_size=max_plan_size,
+    )
+    closed_plans = _generate_graph_plans(graph, home_map, options)
+
+    assert len(closed_plans) == num_part_groups
+
+
+if ethosu_enabled:
+
+    def test_plan_generator_two_conv2d(FLASH, SRAM, TwoConv2DGraph):
+        num_part_groups = 3
+        graph = TwoConv2DGraph
+        home_map = make_simple_home_map(graph, SRAM, FLASH)
+        options = make_options(
+            cascade_region=SRAM,
+            stripe_factors=4,
+            max_plan_size=10,
+        )
+
+        closed_plans = _generate_graph_plans(graph, home_map, options)
+
+        assert len(closed_plans) == num_part_groups
+
+    def test_plan_generator_two_conv2d_with_slice(FLASH, SRAM, TwoConv2DWithSliceGraph):
+        num_part_groups = 4  # Note this is not 6 because 'slice' has an opaque Propagator
+        graph = TwoConv2DWithSliceGraph
+        home_map = make_simple_home_map(graph, SRAM, FLASH)
+        options = make_options(
+            cascade_region=SRAM,
+            stripe_factors=4,
+            max_plan_size=10,
+        )
+
+        closed_plans = _generate_graph_plans(graph, home_map, options)
+
+        assert len(closed_plans) == num_part_groups
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py b/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py
new file mode 100644
index 000000000000..68290e667eb0
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/cascader/test_tensor_config.py
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from tvm.contrib.ethosu.cascader import (
+    StripeConfig,
+    Tensor,
+    MemoryRegion,
+    TensorConfig,
+    TensorConfigState,
+    BufferMode,
+)
+
+import pytest
+
+
+def test_tensor_config(DRAM, SRAM):
+    stripe_config = StripeConfig(
+        shape=[1, 2, 3],
+        extent=[2, 3, 4],
+        strides=[3, 4, 5],
+        order=[4, 5, 6],
+        stripes=[5, 6, 7],
+        offset=[6, 7, 8],
+    )
+    tensor = Tensor(
+        shape=[10, 10, 10],
+        dtype="int8",
+    )
+    home_region = DRAM
+    state = TensorConfigState.BOUNDARY
+    buffer_mode = BufferMode.ROLLING
+    copy_tensor = True
+    copy_region = SRAM
+    tensor_config = TensorConfig(
+        tensor=tensor,
+        home_region=home_region,
+        state=state,
+        buffer_mode=buffer_mode,
+        stripe_configs=[stripe_config],
+        copy_tensor=copy_tensor,
+        copy_region=copy_region,
+    )
+
+    assert tensor_config.tensor == tensor
+    assert tensor_config.home_region == home_region
+    assert tensor_config.state == state
+    assert tensor_config.buffer_mode == buffer_mode
+    assert tensor_config.stripe_configs == [stripe_config]
+    assert tensor_config.copy_tensor == copy_tensor
+    assert tensor_config.copy_region == copy_region
+    assert hash(tensor_config) != 0
+
+
+def test_get_rolling_buffer(DRAM):
+    stripe_config = StripeConfig(
+        shape=[9, 4, 7],
+        extent=[9, 16, 21],
+        strides=[3, 5, 7],
+        order=[1, 3, 2],
+        stripes=[1, 3, 3],
+        offset=[0, 0, 0],
+    )
+    tensor = Tensor(shape=[9, 16, 21], dtype="int32", compression_ratio=0.5)
+    tensor_config = TensorConfig(
+        tensor=tensor,
+        home_region=DRAM,
+        state=TensorConfigState.BOUNDARY,
+        buffer_mode=BufferMode.ROLLING,
+        stripe_configs=[stripe_config],
+    )
+
+    assert tensor_config.get_buffer_size() == 2016
+
+
+def test_get_recompute_buffer(DRAM):
+    stripe_config = StripeConfig(
+        shape=[4, 5, 7],
+        extent=[6, 7, 14],
+        strides=[2, 3, 7],
+        order=[1, 3, 2],
+        stripes=[2, 2, 2],
+        offset=[0, 0, 0],
+    )
+    tensor = Tensor(shape=[6, 7, 14], dtype="int32", compression_ratio=0.5)
+    tensor_config = TensorConfig(
+        tensor=tensor,
+        home_region=DRAM,
+        state=TensorConfigState.BOUNDARY,
+        buffer_mode=BufferMode.RECOMPUTE,
+        stripe_configs=[stripe_config],
+    )
+
+    assert tensor_config.get_buffer_size() == 280
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])