diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt
index 8d07cd9a366..6338ea8891e 100644
--- a/backends/openvino/CMakeLists.txt
+++ b/backends/openvino/CMakeLists.txt
@@ -70,6 +70,24 @@ if(EXECUTORCH_BUILD_OPENVINO_EXECUTOR_RUNNER)
 endif()
 
 
+if(EXECUTORCH_BUILD_OPENVINO_NANOGPT_RUNNER)
+    # Build executor runner binary for openvino backend
+    list(APPEND openvino_nanogpt_runner_libs openvino_backend executorch extension_module_static extension_tensor)
+
+    set(_openvino_nanogpt_runner__srcs
+        ${EXECUTORCH_ROOT}/examples/llm_manual/main.cpp
+        )
+    add_executable(openvino_nanogpt_runner ${_openvino_nanogpt_runner__srcs})
+
+    list(APPEND openvino_nanogpt_runner_libs)
+
+    target_link_libraries(
+      openvino_nanogpt_runner gflags portable_ops_lib ${openvino_nanogpt_runner_libs}
+    )
+    target_compile_options(openvino_nanogpt_runner PUBLIC ${_common_compile_options})
+endif()
+
+
 
 # Install OpenVINO backend library to the lib directory
 install(TARGETS openvino_backend DESTINATION lib)
diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py
index bc3fde573e2..b64ebb0a7b2 100644
--- a/backends/openvino/partitioner.py
+++ b/backends/openvino/partitioner.py
@@ -26,6 +26,12 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 
+class PatternNode:
+    op_types = {}
+
+    def __init__(self):
+        self.op_types = {}
+
 class OpenvinoOperatorsSupport(OperatorSupportBase):
 
     def __init__(
@@ -62,6 +68,13 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
             op_type = node.target.__name__
         else:
             op_type = str(node.target)
+
+        if op_type in self._op_types_to_skip or node.name in self._op_names_to_skip:
+            print(
+                f"[OpenVINO Backend] The {op_type} operator with name '{node.name}' is skipped."
+            )
+            return True
+
         supported_ops = OperatorSupport(options)._support_dict
         if op_type == "getitem":
             return True
@@ -71,11 +84,6 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         else:
             print("Op not supported: ", "torch.ops." + str(op_type))
 
-        if op_type in self._op_types_to_skip or node.name in self._op_names_to_skip:
-            print(
-                f"[OpenVINO Backend] The {op_type} operator with name '{node.name}' is skipped."
-            )
-            return False
 
         return False
 
@@ -119,6 +127,69 @@ def ops_to_not_decompose(
             torch.ops.aten.upsample_nearest2d.vec,
         ]
         return (ops_not_decompose, None)
+    
+    def check_pattern(self, node: torch.fx.Node, pattern: PatternNode, enabled_ops: list) -> bool:
+        print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.0 - op: ", node.op)
+        if node.op == "call_function":
+            print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.1")
+            if ("call_function" + ":" + str(node.target.__name__)) in pattern.op_types:
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - B - target: ", node.target.__name__)
+                pt_input_nodes = node.all_input_nodes
+                pattern_input_ops = pattern.op_types["call_function" + ":" + str(node.target.__name__)]
+                if pattern_input_ops is None:
+                    enabled_ops.append(node)
+                    print("\t\tDEBUG - capture_nncf_patterns - check_pattern - C.1")
+                    return True
+                if len(pt_input_nodes) != len(pattern_input_ops):
+                    print("\t\tDEBUG - capture_nncf_patterns - check_pattern - C.2")
+                    return False
+                for i in range(len(pt_input_nodes)):
+                    if not self.check_pattern(pt_input_nodes[i], pattern_input_ops[i], enabled_ops):
+                        print("\t\tDEBUG - capture_nncf_patterns - check_pattern - C.3")
+                        return False
+                enabled_ops.append(node)
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - C.4")
+                return True
+        elif node.op == "get_attr":
+            if "get_attr" in pattern.op_types:
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.2")
+                return True
+            else:
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.3")
+                return False
+        elif node.op == "placeholder":
+            if "placeholder" in pattern.op_types:
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.2")
+                return True
+            else:
+                print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.3")
+                return False
+        print("\t\tDEBUG - capture_nncf_patterns - check_pattern - A.4")
+        return False
+
+    def capture_nncf_patterns(self, graph_module: torch.fx.GraphModule):
+        const_node = PatternNode
+        const_node.op_types["get_attr"] = None
+        const_node.op_types["placeholder"] = None
+        bitwise_right_shift_node = PatternNode
+        bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor_Scalar"] = [const_node]
+        bitwise_and_node = PatternNode
+        bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [const_node]
+        stack_node = PatternNode
+        stack_node.op_types["call_function:aten.stack.default"] = [bitwise_and_node, bitwise_right_shift_node]
+
+        print("DEBUG - capture_nncf_patterns - A")
+        for node in graph_module.graph.nodes:
+            print("\tDEBUG - capture_nncf_patterns - B - op: ", node.op, ", target: ", node.target)
+            if str(node.op) == "call_function" and str(node.target.__name__) == "aten.stack.default":
+                print("\tDEBUG - capture_nncf_patterns - C - stack found")
+                enabled_ops = []
+                pattern_match = self.check_pattern(node, stack_node, enabled_ops)
+                if pattern_match:
+                    print("\tDEBUG - capture_nncf_patterns - D - match")
+                    for pattern_op in enabled_ops:
+                        print(pattern_op.name)
+                        self._op_names_to_skip.add(pattern_op.name)
 
     def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         """
@@ -127,15 +198,44 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         :param exported_program: The exported program.
         :return: A PartitionResult containing the partitioned graph and delegation tags.
         """
+
+        self._op_names_to_skip = set()
+        print("DEBUG - OpenvinoPartitioner - graph")
+        #print(exported_program.graph_module.code)
+        for node in exported_program.graph_module.graph.nodes:
+            if str(node.op).strip() == "call_function" and str(node.target.__name__).strip() == "aten.slice_copy.Tensor":
+            #if str(node.op).strip() == "call_function" and str(node.target.__name__).strip() == "aten.slice_copy.Tensor" and str(node.name).strip() == "aten_slice_copy_tensor_6":
+                print("\tDEBUG - OpenvinoPartitioner - slice_copy - op: ", node.op, ", target: ", node.target.__name__, ", name: ", node.name)
+                if not (len(node.all_input_nodes) == 3):
+                    continue
+                slice_copy_in0 = node.all_input_nodes[0]
+                if not (str(slice_copy_in0.op).strip() == "placeholder"):
+                    continue
+                print("\t\tDEBUG - OpenvinoPartitioner - slice_copy_in0 - op: ", slice_copy_in0.op, ", target: ", slice_copy_in0.target, ", name: ", slice_copy_in0.name)
+                slice_copy_in1 = node.all_input_nodes[1]
+                if not (str(slice_copy_in1.op).strip() == "call_function" and str(slice_copy_in1.target.__name__).strip() == "_local_scalar_dense.default"):
+                    continue
+                print("\t\tDEBUG - OpenvinoPartitioner - slice_copy_in1 - op: ", slice_copy_in1.op, ", target: ", slice_copy_in1.target.__name__, ", name: ", slice_copy_in1.name)
+                slice_copy_in2 = node.all_input_nodes[2]
+                if not (str(slice_copy_in2.op).strip() == "call_function" and str(slice_copy_in2.target.__name__).strip() == "add"):
+                    continue
+                print("\t\tDEBUG - OpenvinoPartitioner - slice_copy_in2 - op: ", slice_copy_in2.op, ", target: ", slice_copy_in2.target.__name__, ", name: ", slice_copy_in2.name)
+                #for input_node in node.all_input_nodes:
+                #    print("\tDEBUG - OpenvinoPartitioner - input_node - op: ", input_node.op, ", target: ", input_node.target, ", name: ", input_node.name)
+                self._op_names_to_skip.add(node.name)
+                
+        self.capture_nncf_patterns(exported_program.graph_module)
         partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
             OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip),
             allows_single_node_partition=True,
         )
         partition_list = partitioner.propose_partitions()
+        print("DEBUG - num_parts: ", len(partition_list))
 
         partition_tags = {}
         for partition in partition_list:
+            print("\tDEBUG - part - size: ", partition.size())
             for node in partition.nodes:
                 tag = f"tag{partition.id}"
                 node.meta["delegation_tag"] = tag
diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py
index c343f44a8b5..665921f50e7 100644
--- a/backends/openvino/preprocess.py
+++ b/backends/openvino/preprocess.py
@@ -52,3 +52,4 @@ def preprocess(
         model_bytes = compiled.export_model()
 
         return PreprocessResult(processed_bytes=model_bytes.getvalue())
+        #return PreprocessResult(processed_bytes=model_bytes)
diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt
index 316633e9004..ccb2aa91430 100644
--- a/backends/openvino/requirements.txt
+++ b/backends/openvino/requirements.txt
@@ -1,2 +1,2 @@
 transformers
-git+https://github.com/openvinotoolkit/nncf@6b0fc1c#egg=nncf
+git+https://github.com/openvinotoolkit/nncf@develop#egg=nncf
diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp
index a3134f72b4b..20e308e6ef7 100644
--- a/backends/openvino/runtime/OpenvinoBackend.cpp
+++ b/backends/openvino/runtime/OpenvinoBackend.cpp
@@ -23,6 +23,36 @@ namespace executorch {
 namespace backends {
 namespace openvino {
 
+
+std::string scalarTypeToString(exa::ScalarType type) {
+    switch (type) {
+        case exa::ScalarType::Byte:   return "Byte";
+        case exa::ScalarType::Char:   return "Char";
+        case exa::ScalarType::Short:  return "Short";
+        case exa::ScalarType::Int:    return "Int";
+        case exa::ScalarType::Long:   return "Long";
+        case exa::ScalarType::Half:   return "Half";
+        case exa::ScalarType::Float:  return "Float";
+        case exa::ScalarType::Double: return "Double";
+        case exa::ScalarType::Bool:   return "Bool";
+        case exa::ScalarType::BFloat16: return "BFloat16";
+        case exa::ScalarType::ComplexHalf: return "ComplexHalf";
+        case exa::ScalarType::ComplexFloat: return "ComplexFloat";
+        case exa::ScalarType::ComplexDouble: return "ComplexDouble";
+        case exa::ScalarType::QUInt8: return "QUInt8";
+        case exa::ScalarType::QInt8:  return "QInt8";
+        case exa::ScalarType::QInt32: return "QInt32";
+        case exa::ScalarType::QUInt4x2: return "QUInt4x2";
+        case exa::ScalarType::QUInt2x4: return "QUInt2x4";
+        case exa::ScalarType::Undefined: return "Undefined";
+        case exa::ScalarType::NumOptions: return "NumOptions";
+        default:
+            throw std::invalid_argument("Unknown ScalarType");
+    }
+}
+
+
+
 OpenvinoBackend::OpenvinoBackend() {}
 
 bool OpenvinoBackend::is_available() const {
@@ -71,7 +101,9 @@ exr::Result<exr::DelegateHandle*> OpenvinoBackend::init(
   }
 
   // Import the model
+  //std::cout << "DEBUG - before import" << std::endl;
   auto compiled_model = core.import_model(compiled_stream, device);
+  //std::cout << "DEBUG - after import" << std::endl;
 
   // The processed data can be freed since the model is compiled
   processed->Free();
@@ -102,22 +134,111 @@ exr::Error OpenvinoBackend::execute(
   size_t num_outputs = infer_request->get_compiled_model().outputs().size();
 
   // Set inputs
+  //std::cout << "DEBUG - OpenvinoBackend - num_inputs: " << num_inputs << std::endl;
   for (size_t i = 0; i < num_inputs; i++) {
-    auto input_tensor = args[i]->toTensor();
-    ov::Shape input_shape(
-        input_tensor.sizes().begin(), input_tensor.sizes().end());
-
-    // Convert input tensor to OpenVINO tensor
-    ov::element::Type ov_type =
-        convert_to_openvino_type(input_tensor.scalar_type());
-    ov::Tensor ov_input_tensor(
-        ov_type, input_shape, input_tensor.mutable_data_ptr());
-
-    infer_request->set_input_tensor(i, ov_input_tensor);
+    //std::cout << "DEBUG - OpenvinoBackend - input - A - i: " << i << std::endl;
+
+    //if (args[i]->isNone()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: none" << std::endl;
+    //} else if (args[i]->isInt()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: int, val: " << args[i]->toInt() << std::endl;
+    //} else if (args[i]->isDouble()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: double" << std::endl;
+    //} else if (args[i]->isBool()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: bool" << std::endl;
+    //} else if (args[i]->isScalar()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: scalar" << std::endl;
+    //} else if (args[i]->isTensor()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: tensor, shape: [";
+    //    for (int j=0; j<args[i]->toTensor().dim(); j++) {
+    //        std::cout << args[i]->toTensor().size(j) << ", ";
+    //    }
+    //    std::cout << "]" << std::endl;
+    //} else if (args[i]->isString()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: string" << std::endl;
+    //} else if (args[i]->isIntList()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: int_list" << std::endl;
+    //} else if (args[i]->isBoolList()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: bool_list" << std::endl;
+    //} else if (args[i]->isDoubleList()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: double_list" << std::endl;
+    //} else if (args[i]->isTensorList()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: tensor_list" << std::endl;
+    //} else if (args[i]->isListOptionalTensor()) {
+    //    std::cout << "DEBUG - Module - forward - A - type: list_optional_tensor" << std::endl;
+    //} else {
+    //    std::cout << "DEBUG - Module - forward - A - type: no type available" << std::endl;
+    //}
+
+    if (args[i]->isInt()) {
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.1" << std::endl;
+        //auto input_tensor = args[i]->toInt();
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.2" << std::endl;
+        //ov::Shape input_shape(
+        //    input_tensor.sizes().begin(), input_tensor.sizes().end());
+
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.3" << std::endl;
+        // Convert input tensor to OpenVINO tensor
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.4" << std::endl;
+        //int64_t val = args[i]->toInt();
+        //int64_t val = i;
+        int64_t *val = &(args[i]->payload.copyable_union.as_int);
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.5 - val: " << val << std::endl;
+        //ov::Tensor ov_input_tensor(ov::element::i64, ov::Shape{}, &val);
+        //std::vector<int64_t> val = {args[i]->toInt()};
+        //ov::Tensor ov_input_tensor(ov::element::i64, ov::Shape{1}, &val);
+        ov::Tensor ov_input_tensor(ov::element::i64, ov::Shape{1}, val);
+        //std::cout << "\tDEBUG - OpenvinoBackend - input - int - val: " << ((int64_t*)(ov_input_tensor.data<int64_t>()))[0] << ", byte_size: " << ov_input_tensor.get_byte_size() << std::endl;
+
+        infer_request->set_input_tensor(i, ov_input_tensor);
+        //std::cout << "DEBUG - OpenvinoBackend - input - B.7" << std::endl;
+    } else {
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.1" << std::endl;
+        auto input_tensor = args[i]->toTensor();
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.2" << std::endl;
+        ov::Shape input_shape(
+            input_tensor.sizes().begin(), input_tensor.sizes().end());
+
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.3" << std::endl;
+        // Convert input tensor to OpenVINO tensor
+        ov::element::Type ov_type =
+            convert_to_openvino_type(input_tensor.scalar_type());
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.4" << std::endl;
+        ov::Tensor ov_input_tensor(
+            ov_type, input_shape, input_tensor.mutable_data_ptr());
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.5" << std::endl;
+
+        infer_request->set_input_tensor(i, ov_input_tensor);
+        //std::cout << "DEBUG - OpenvinoBackend - input - C.6" << std::endl;
+
+        //if (ov_type == ov::element::i64) {
+        //    int64_t sum = 0;
+        //    auto data_ptr = ov_input_tensor.data<int64_t>();
+        //    for (size_t j=0; j < ov_input_tensor.get_byte_size()/sizeof(int64_t); j++) {
+        //        sum += data_ptr[j];
+        //    }
+        //    //std::cout << "\tDEBUG - OpenvinoBackend - input - tensor - shape: " << ov_input_tensor.get_shape() << ", type: " << ov_input_tensor.get_element_type() << ", sum_of_values: " << sum << std::endl;
+        //} else {
+        //    float sum = 0;
+        //    auto data_ptr = ov_input_tensor.data<float>();
+        //    for (size_t j=0; j < ov_input_tensor.get_byte_size()/sizeof(float); j++) {
+        //        sum += data_ptr[j];
+        //    }
+        //    //std::cout << "\tDEBUG - OpenvinoBackend - input - tensor - shape: " << ov_input_tensor.get_shape() << ", type: " << ov_input_tensor.get_element_type() << ", sum_of_values: " << sum << std::endl;
+        //}
+        //std::cout << "\tDEBUG - OpenvinoBackend - input - tensor - shape: " << ov_input_tensor.get_shape() << ", type: " << ov_input_tensor.get_element_type() << std::endl;
+    }
   }
 
   // Set outputs
+  //std::cout << "DEBUG - OpenvinoBackend - num_outputs: " << num_outputs << std::endl;
   for (size_t i = 0; i < num_outputs; i++) {
+    //args[num_inputs + i]->toTensor().unsafeGetTensorImpl()->set_size(1,1);
+    //std::cout << "DEBUG - OpenvinoBackend output - i: " << i << " - type: tensor, shape: [";
+    //for (int j=0; j<args[num_inputs + i]->toTensor().dim(); j++) {
+    //    std::cout << args[num_inputs + i]->toTensor().size(j) << ", ";
+    //}
+    //std::cout << "]" << std::endl; 
     auto output_tensor = args[num_inputs + i]->toTensor();
     ov::Shape output_shape(
         output_tensor.sizes().begin(), output_tensor.sizes().end());
@@ -133,7 +254,23 @@ exr::Error OpenvinoBackend::execute(
 
   // Execute the inference
   infer_request->infer();
-
+  //for (size_t i = 0; i < num_outputs; i++) {
+  //    auto out_t = infer_request->get_output_tensor(i);
+  //    float sum = 0;
+  //    auto data_ptr = out_t.data<float>();
+  //    for (size_t j=0; j < out_t.get_byte_size()/sizeof(float); j++) {
+  //        sum += data_ptr[j];
+  //    }
+  //    //std::cout << "\tDEBUG - OpenvinoBackend output - after infer tensor - shape: " << out_t.get_shape() << ", type: " << out_t.get_element_type() << ", sum_of_values: " << sum << std::endl;
+  //}
+  //auto out_t = infer_request->get_output_tensor(0);
+  //std::cout << "DEBUG - OpenvinoBackend output - after infer tensor - shape: " << out_t.get_shape() << std::endl;
+  //for (int j=0; j<args[num_inputs + i]->toTensor().dim(); j++) {
+  //    std::cout << args[num_inputs + i]->toTensor().size(j) << ", ";
+  //}
+  //std::cout << "]" << std::endl;
+
+  //std::cout << "DEBUG - OpenvinoBackend - DD" << std::endl;
   return exr::Error::Ok;
 }
 
@@ -162,13 +299,18 @@ void OpenvinoBackend::destroy(exr::DelegateHandle* handle) const {
 
 ov::element::Type OpenvinoBackend::convert_to_openvino_type(
     exa::ScalarType scalar_type) const {
+  //std::cout << "DEBUG - scalar_type: " << scalarTypeToString(scalar_type) << std::endl;
   switch (scalar_type) {
     case exa::ScalarType::Float:
       return ov::element::f32;
+    case exa::ScalarType::Half:
+      return ov::element::f16;
     case exa::ScalarType::Int:
       return ov::element::i32;
     case exa::ScalarType::Char:
       return ov::element::i8;
+    case exa::ScalarType::Byte:
+      return ov::element::u8;
     case exa::ScalarType::Long:
       return ov::element::i64;
     case exa::ScalarType::Bool:
diff --git a/backends/openvino/utils.py b/backends/openvino/utils.py
new file mode 100644
index 00000000000..debefdd1a35
--- /dev/null
+++ b/backends/openvino/utils.py
@@ -0,0 +1,150 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+from typing import Any, Dict, Optional, Tuple, Union
+
+import executorch.exir as exir
+
+import torch
+from executorch.exir import EdgeProgramManager, ExecutorchProgramManager
+from executorch.exir.program._program import to_edge_with_preserved_ops
+from executorch.exir.tracer import Value
+from torch.export import export, export_for_training, ExportedProgram
+
+
+_EDGE_COMPILE_CONFIG = exir.EdgeCompileConfig(
+    _check_ir_validity=True,
+    _skip_dim_order=True,  # TODO(T189114319): Reuse dim order op after solving the ios oss issue
+)
+
+
+def _to_core_aten(
+    model: Union[torch.fx.GraphModule, torch.nn.Module],
+    example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
+    dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
+    strict=True,
+    verbose=True,
+) -> ExportedProgram:
+    # post autograd export. eventually this will become .to_core_aten
+    if not isinstance(model, torch.fx.GraphModule) and not isinstance(
+        model, torch.nn.Module
+    ):
+        raise ValueError(
+            f"Expected passed in model to be an instance of fx.GraphModule, got {type(model)}"
+        )
+    core_aten_ep = export(
+        model,
+        example_inputs,
+        example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+    )
+    if verbose:
+        logging.info(f"Core ATen graph:\n{core_aten_ep.graph}")
+    return core_aten_ep
+
+
+def _core_aten_to_edge(
+    core_aten_exir_ep: ExportedProgram,
+    edge_constant_methods: Optional[Dict[str, Any]] = None,
+    edge_compile_config=None,
+    verbose=True,
+) -> EdgeProgramManager:
+    if not edge_compile_config:
+        edge_compile_config = exir.EdgeCompileConfig(
+            _check_ir_validity=False,  # quant ops currently break ir verification
+        )
+    edge_manager: EdgeProgramManager = to_edge_with_preserved_ops(
+        core_aten_exir_ep,
+        constant_methods=edge_constant_methods,
+        compile_config=edge_compile_config,
+        preserve_ops=[torch.ops.aten.stack.default,],
+    )
+    if verbose:
+        logging.info(f"Exported graph:\n{edge_manager.exported_program()}")
+    return edge_manager
+
+
+def export_to_edge(
+    model: Union[torch.fx.GraphModule, torch.nn.Module],
+    example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
+    dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
+    edge_constant_methods: Optional[Dict[str, Any]] = None,
+    edge_compile_config=_EDGE_COMPILE_CONFIG,
+    strict=True,
+    verbose=True,
+) -> EdgeProgramManager:
+    print("DEBUG - executorch - openvino_utils")
+    core_aten_ep = _to_core_aten(
+        model,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+        verbose=verbose,
+    )
+    return _core_aten_to_edge(
+        core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
+    )
+
+
+def export_to_exec_prog(
+    model: Union[torch.fx.GraphModule, torch.nn.Module],
+    example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict[str, Any]] = None,
+    dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
+    edge_constant_methods: Optional[Dict[str, Any]] = None,
+    edge_compile_config=_EDGE_COMPILE_CONFIG,
+    backend_config=None,
+    strict=True,
+) -> ExecutorchProgramManager:
+    m = model.eval()
+    # pre-autograd export. eventually this will become torch.export
+    m = export_for_training(m, example_inputs, strict=True).module()
+
+    core_aten_ep = _to_core_aten(
+        m,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+    )
+
+    edge_m = _core_aten_to_edge(
+        core_aten_ep, edge_constant_methods, edge_compile_config
+    )
+
+    exec_prog = edge_m.to_executorch(backend_config)
+    return exec_prog
+
+
+def save_pte_program(
+    prog: ExecutorchProgramManager, model_name: str, output_dir: str = ""
+) -> str:
+    if model_name.endswith(".pte"):
+        filename = model_name
+    else:
+        filename = os.path.join(output_dir, f"{model_name}.pte")
+
+    try:
+        # Write program to file.
+        with open(filename, "wb") as file:
+            prog.write_to_file(file)
+            logging.info(f"Saved exported program to {filename}")
+        # Write data to file/s.
+        prog.write_tensor_data_to_file(outdir=output_dir)
+    except Exception as e:
+        logging.error(f"Error while saving to {filename}: {e}")
+
+    return filename
diff --git a/examples/models/llama/CMakeLists.txt b/examples/models/llama/CMakeLists.txt
index 952cdf1b65d..f0be54f8806 100644
--- a/examples/models/llama/CMakeLists.txt
+++ b/examples/models/llama/CMakeLists.txt
@@ -173,6 +173,14 @@ if(TARGET qnn_executorch_backend)
   target_link_options_shared_lib(qnn_executorch_backend)
 endif()
 
+# Openvino backend
+if(TARGET openvino_backend)
+  find_package(OpenVINO REQUIRED)
+  target_link_libraries(openvino_backend INTERFACE openvino::runtime executorch_core)
+  list(APPEND link_libraries openvino_backend)
+  target_link_options_shared_lib(openvino_backend)
+endif()
+
 # MPS backend
 if(TARGET mpsdelegate)
   list(
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index 3a3102886f8..9cd906ad2f3 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -38,6 +38,7 @@
 from executorch.extension.llm.export.partitioner_lib import (
     get_coreml_partitioner,
     get_mps_partitioner,
+    get_openvino_partitioner,
     get_qnn_partitioner,
     get_vulkan_partitioner,
     get_xnnpack_partitioner,
@@ -441,6 +442,7 @@ def build_args_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Delegate llama2 to qnn backend (Qualcomm), please use it --kv_cahce=True",
     )
+    parser.add_argument("--openvino", action="store_true")
 
     parser.add_argument(
         "--expand_rope_table",
@@ -546,6 +548,13 @@ def build_args_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="If true, stops right after torch.export() and saves the exported model.",
     )
+
+    parser.add_argument(
+        "--nncf_compression",
+        default=False,
+        action="store_true",
+        help="If true, stops right after torch.export() and saves the exported model.",
+    )
     return parser
 
 
@@ -851,6 +860,7 @@ def _to_edge_and_lower_llama(  # noqa: C901
     mps: bool = False,
     coreml: bool = False,
     qnn: bool = False,
+    openvino: bool = False,
     dtype_override: str = "fp32",
     enable_dynamic_shape: bool = True,
     use_kv_cache: bool = False,
@@ -887,6 +897,10 @@ def _to_edge_and_lower_llama(  # noqa: C901
         partitioners.append(get_mps_partitioner(use_kv_cache))
         modelname = f"mps_{modelname}"
 
+    if openvino:
+        partitioners.append(get_openvino_partitioner(use_kv_cache))
+        modelname = f"openvino_{modelname}"
+
     if coreml:
         coreml_partitioner = get_coreml_partitioner(
             coreml_ios,
@@ -1063,6 +1077,7 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
             mps=args.mps,
             coreml=args.coreml,
             qnn=args.qnn,
+            openvino=args.openvino,
             dtype_override=args.dtype_override,
             enable_dynamic_shape=args.enable_dynamic_shape,
             use_kv_cache=args.use_kv_cache,
@@ -1221,6 +1236,7 @@ def _load_llama_model(
         use_legacy_export=args.qnn,
         save_exported_program=args.export_only,
         verbose=verbose,
+        nncf_compression=args.nncf_compression,
         metadata=_load_llama_model_metadata(
             weight_type,
             use_kv_cache,
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
index 323311caeea..6339da0b311 100644
--- a/extension/llm/export/builder.py
+++ b/extension/llm/export/builder.py
@@ -15,7 +15,7 @@
 from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple
 from unittest.mock import patch
-
+import nncf
 import torch
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
     DuplicateDynamicQuantChainPass,
@@ -41,6 +41,7 @@
 from torch.export import export_for_training, ExportedProgram
 from torch.nn.attention import SDPBackend
 from torchao.utils import unwrap_tensor_subclass
+from functools import partial
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -99,6 +100,7 @@ def __init__(
         dynamic_shapes: Optional[Any] = None,
         use_legacy_export: bool = False,
         save_exported_program: bool = False,
+        nncf_compression: bool = False
     ):
         # Store necessary constructor arguments.
         self.model = model
@@ -120,6 +122,7 @@ def __init__(
         self.dynamic_shapes = dynamic_shapes
         self.use_legacy_export = use_legacy_export
         self.save_exported_program = save_exported_program
+        self.nncf_compression = nncf_compression
 
         # Note: treat this as the source of truth for the result of
         # torch.export'ing a model. If the overall ExportedProgram is needed,
@@ -409,6 +412,36 @@ def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManage
                 DuplicateDynamicQuantChainPass()(m)
                 self.pre_autograd_graph_module = m
             return self
+        elif (self.nncf_compression):
+            print("DEBUG - executorch - builder - quantize - A")
+            tokenizer = get_tokenizer(self.tokenizer_path)
+
+            def transform_fn(
+                prompts: str, tokenizer
+            ):
+                tokenized_text = tokenizer.encode(prompts, bos=False, eos=False)
+                logging.error(tokenized_text)
+
+                inputs = ()
+                inputs = (
+                    torch.tensor(tokenized_text).unsqueeze(0),
+                    {"input_pos": torch.tensor([0])},
+                )
+
+                return inputs
+
+            self.calibration_data = [self.calibration_data] if isinstance(self.calibration_data, str) else self.calibration_data
+            self.calibration_data = [word for prompt in self.calibration_data for word in prompt.split()] if not self.dynamic_shapes else self.calibration_data
+            logging.error(self.calibration_data)
+            self.pre_autograd_graph_module = nncf.compress_weights(
+                                                                self.pre_autograd_graph_module,
+                                                                dataset=nncf.Dataset(self.calibration_data, transform_func=partial(transform_fn, tokenizer=tokenizer)),
+                                                                mode=nncf.CompressWeightsMode.INT4_SYM,
+                                                                ratio=0.8,
+                                                                sensitivity_metric=nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION,
+                                                            )
+            print("DEBUG - executorch - builder - quantize - B")
+            return self
         else:
             logging.info("No quantizer provided, passing...")
             return self
@@ -417,6 +450,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
         """
         Export the model to Edge dialect and retrieve a LLMEdgeManager.
         """
+        print("DEBUG - executorch - builder - export_to_edge - A")
         dynamic_shape = self._get_dynamic_shape()
         edge_config = self._get_edge_config()
 
@@ -436,6 +470,8 @@ def export_to_edge(self) -> "LLMEdgeManager":
                 )
 
             with override_export_behaviour:
+                if (self.nncf_compression):
+                    from executorch.backends.openvino.utils import export_to_edge
                 self.edge_manager = export_to_edge(
                     self.pre_autograd_graph_module,  # pyre-fixme[6]
                     self.example_inputs,
@@ -445,6 +481,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
                     edge_compile_config=edge_config,
                     verbose=self.verbose,
                 )
+        print("DEBUG - executorch - builder - export_to_edge - B")
         return self
 
     def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManager":
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
index 20604bbf635..ade3bec094f 100644
--- a/extension/llm/export/partitioner_lib.py
+++ b/extension/llm/export/partitioner_lib.py
@@ -64,6 +64,26 @@ def get_mps_partitioner(use_kv_cache: bool = False):
     return MPSPartitioner(compile_specs)  # pyre-fixme[16]
 
 
+def get_openvino_partitioner(use_kv_cache: bool = False):
+    from executorch.exir.backend.backend_details import CompileSpec
+
+    assert (
+        use_kv_cache is True
+    ), "MPS backend currently only supports static shape and use_kv_cache=True is the only way to support it at the moment"
+    try:
+        # pyre-ignore Undefined import [21]: Could not find a module corresponding to import `executorch.backends.apple.mps.partition.mps_partitioner`.
+        from executorch.backends.openvino.partitioner import (
+            OpenvinoPartitioner,
+        )
+    except ImportError:
+        raise ImportError(
+            "Please install the MPS backend follwing https://pytorch.org/executorch/main/build-run-mps.html"
+        )
+
+    compile_specs = [CompileSpec("device", "CPU".encode())]
+    return OpenvinoPartitioner(compile_specs)  # pyre-fixme[16]
+
+
 def get_coreml_partitioner(
     ios: int = 15,
     embedding_quantize: Optional[str] = None,
diff --git a/tools/cmake/executorch-config.cmake b/tools/cmake/executorch-config.cmake
index aa5776163a9..baf91f3fd1a 100644
--- a/tools/cmake/executorch-config.cmake
+++ b/tools/cmake/executorch-config.cmake
@@ -68,6 +68,7 @@ set(lib_list
     mpsdelegate
     neuron_backend
     qnn_executorch_backend
+    openvino_backend
     portable_ops_lib
     custom_ops
     extension_module