diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp
index 5a02962fb03..3b4458ea8f9 100644
--- a/csrc/fusion_segmenter.cpp
+++ b/csrc/fusion_segmenter.cpp
@@ -3399,13 +3399,21 @@ void SegmentCandidateFinder::forwardInputs() {
         continue;
       }
 
-      if (expr->output(0)->uses().size() > 1) {
+      // expr is a unary op so there is a single output. Here we look at that
+      // output's further uses
+      const auto& output_uses = expr->output(0)->uses();
+
+      if (output_uses.size() == 1) {
+        // If there is a single use, visit it to try and extend the chain of
+        // unaryOps
+        to_visit.emplace_back(output_uses.at(0));
+      } else {
+        // If there are either no more uses, or more than one use, we cannot
+        // extend the chain of unary Ops. In either case, finalize this chain by
+        // saving the expr and its output.
         excluded_inp_unary_exprs_.pushBack(expr);
         forwarded_inputs.pushBack(expr->output(0));
-        continue;
       }
-
-      to_visit.emplace_back(expr->output(0)->uses()[0]);
     }
   }
 
diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp
index cd9ee4f7392..54fb4da2af4 100644
--- a/test/test_gpu3.cpp
+++ b/test/test_gpu3.cpp
@@ -9215,6 +9215,42 @@ TEST_F(NVFuserTest, FusionDebugStreamGuard_CUDA) {
   ASSERT_EQ(ss.str(), text);
 }
 
+// Repro of https://github.com/NVIDIA/Fuser/issues/585
+TEST_F(NVFuserTest, FusionDanglingUnaryOp_CUDA) {
+  auto fusion = std::make_unique<Fusion>();
+  FusionGuard fg(fusion.get());
+
+  // Create a segmented Fusion. We call segment_set here to ensure the whole
+  // Fusion cannot be scheduled. This triggers segmentation, so that
+  // forwardInputs() is called. The structure of this Fusion is not important;
+  // it is only important that it must be segmented.
+  auto size = IrBuilder::create<Scalar>(5);
+  auto tv0 = full({size}, fusion->zeroVal(), DataType::Int);
+  auto tv1 = segment_set(tv0);
+  fusion->addOutput(tv1);
+
+  // Now take in an input that has a chain of UnaryOp uses that terminates in a
+  // Val with no uses. This triggers a segfault in forwardInputs().
+  Val* alpha = IrBuilder::create<Scalar>(DataType::Int);
+  fusion->addInput(alpha);
+  neg(castOp(DataType::Float, alpha));
+
+  FusionExecutorCache executor_cache(std::move(fusion));
+
+  auto cg_outputs = executor_cache.runFusionWithInputs({11});
+
+  auto options = at::TensorOptions().dtype(at::kInt).device(at::kCUDA, 0);
+  auto aten_out = at::zeros({5}, options);
+
+  testValidate(
+      executor_cache.fusion(),
+      cg_outputs,
+      {11},
+      {aten_out},
+      __LINE__,
+      __FILE__);
+}
+
 // Test file size should be up to 10K LoC. Create a new file for more tests.
 
 } // namespace nvfuser