gocardless · ahjmorton · Dec 17, 2019 · Dec 16, 2019 · ahjmorton · Dec 16, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 *   Fix-up metric constant names
 *   Apply label to locker metrics to indicate cursor presence
 *   Pin Prometheus to a GoCardless feature branch [link](https://github.com/gocardless/prometheus_client_ruby/tree/gc_production_branch_do_not_push) that is incompatible with upstream. This version has breaking API changes and includes new features such as pluggable data stores.
+*   Add `failed` label to queue collector middleware
 
 ### 1.0.0 (2018-07-20)
 

diff --git a/lib/que/middleware/queue_collector.rb b/lib/que/middleware/queue_collector.rb
@@ -10,20 +10,20 @@ module Middleware
     class QueueCollector
       Queued = Prometheus::Client::Gauge.new(
         :que_queue_queued,
-        docstring: "Number of jobs in the queue, by job_class/priority/due",
-        labels: %i[queue job_class priority due],
+        docstring: "Number of jobs in the queue, by job_class/priority/due/failed",
+        labels: %i[queue job_class priority due failed],
       )
       DeadTuples = Prometheus::Client::Gauge.new(
         :que_dead_tuples,
         docstring: "Number of dead tuples in the que_jobs table",
       )
-
       QUEUE_VIEW_SQL = <<~SQL
         select queue, job_class, priority
              , (case when (retryable AND run_at < now()) then 'true' else 'false' end) as due
+             , (case when (NOT retryable AND error_count > 0) then 'true' else 'false' end) as failed
              , count(*)
-          from que_jobs
-         group by 1, 2, 3, 4;
+             from que_jobs
+         group by 1, 2, 3, 4, 5;
       SQL
 
       DEAD_TUPLES_SQL = <<~SQL
@@ -58,7 +58,8 @@ def call(env)
           # metric collector from hurting the database when it's already under pressure.
           Que.execute("set local statement_timeout='500ms';")
 
-          # Now we can safely update our gauges, touching only those that exist in our queue
+          # Now we can safely update our gauges, touching only those that exist
+          # in our queue
           Que.execute(QUEUE_VIEW_SQL).each do |labels|
             Queued.set(
               labels["count"],
@@ -67,6 +68,7 @@ def call(env)
                 job_class: labels["job_class"],
                 priority: labels["priority"],
                 due: labels["due"],
+                failed: labels["failed"],
               },
             )
           end

diff --git a/spec/lib/que/middleware/queue_collector_spec.rb b/spec/lib/que/middleware/queue_collector_spec.rb
@@ -18,6 +18,12 @@
     FakeJob.enqueue(run_at: due_now, priority: 1, retryable: false)
     FakeJob.enqueue(run_at: pending_now, priority: 1) # not due
 
+    # Fail a job in the same way it would be failed if the worker had run it
+    job_to_fail = FakeJob.enqueue(run_at: due_now, priority: 20, retryable: false)
+    error = StandardError.new("bang")
+    error.set_backtrace(caller)
+    Que::Worker.new.send(:handle_job_failure, error, job_to_fail.attrs)
+
     # not due, different queue
     FakeJob.enqueue(run_at: pending_now, queue: "another", priority: 1)
   end
@@ -27,10 +33,16 @@
       collector.call({})
 
       expect(described_class::Queued.values).to eql(
-        { queue: "default", job_class: "FakeJob", priority: "1", due: "true" } => 2.0,
-        { queue: "default", job_class: "FakeJob", priority: "10", due: "true" } => 1.0,
-        { queue: "default", job_class: "FakeJob", priority: "1", due: "false" } => 2.0,
-        { queue: "another", job_class: "FakeJob", priority: "1", due: "false" } => 1.0,
+        { queue: "default", job_class: "FakeJob", priority: "1",
+          due: "true", failed:  "false" } => 2.0,
+        { queue: "default", job_class: "FakeJob", priority: "10",
+          due: "true", failed:  "false" } => 1.0,
+        { queue: "default", job_class: "FakeJob", priority: "1",
+          due: "false", failed:  "false" } => 2.0,
+        { queue: "another", job_class: "FakeJob", priority: "1",
+          due: "false", failed:  "false" } => 1.0,
+        { queue: "default", job_class: "FakeJob", priority: "20",
+          due: "false", failed:  "true" } => 1.0,
       )
 
       # It's not easy to predict the number of dead tuples deterministically, so we just