diff --git a/CHANGELOG.md b/CHANGELOG.md index 26dbd081..677ab099 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Fix-up metric constant names * Apply label to locker metrics to indicate cursor presence * Pin Prometheus to a GoCardless feature branch [link](https://github.com/gocardless/prometheus_client_ruby/tree/gc_production_branch_do_not_push) that is incompatible with upstream. This version has breaking API changes and includes new features such as pluggable data stores. +* Add `failed` label to queue collector middleware ### 1.0.0 (2018-07-20) diff --git a/lib/que/middleware/queue_collector.rb b/lib/que/middleware/queue_collector.rb index cb8c7062..92bea071 100644 --- a/lib/que/middleware/queue_collector.rb +++ b/lib/que/middleware/queue_collector.rb @@ -10,20 +10,20 @@ module Middleware class QueueCollector Queued = Prometheus::Client::Gauge.new( :que_queue_queued, - docstring: "Number of jobs in the queue, by job_class/priority/due", - labels: %i[queue job_class priority due], + docstring: "Number of jobs in the queue, by job_class/priority/due/failed", + labels: %i[queue job_class priority due failed], ) DeadTuples = Prometheus::Client::Gauge.new( :que_dead_tuples, docstring: "Number of dead tuples in the que_jobs table", ) - QUEUE_VIEW_SQL = <<~SQL select queue, job_class, priority , (case when (retryable AND run_at < now()) then 'true' else 'false' end) as due + , (case when (NOT retryable AND error_count > 0) then 'true' else 'false' end) as failed , count(*) - from que_jobs - group by 1, 2, 3, 4; + from que_jobs + group by 1, 2, 3, 4, 5; SQL DEAD_TUPLES_SQL = <<~SQL @@ -58,7 +58,8 @@ def call(env) # metric collector from hurting the database when it's already under pressure. Que.execute("set local statement_timeout='500ms';") - # Now we can safely update our gauges, touching only those that exist in our queue + # Now we can safely update our gauges, touching only those that exist + # in our queue Que.execute(QUEUE_VIEW_SQL).each do |labels| Queued.set( labels["count"], @@ -67,6 +68,7 @@ def call(env) job_class: labels["job_class"], priority: labels["priority"], due: labels["due"], + failed: labels["failed"], }, ) end diff --git a/spec/lib/que/middleware/queue_collector_spec.rb b/spec/lib/que/middleware/queue_collector_spec.rb index 2bc17094..a18c8d82 100644 --- a/spec/lib/que/middleware/queue_collector_spec.rb +++ b/spec/lib/que/middleware/queue_collector_spec.rb @@ -18,6 +18,12 @@ FakeJob.enqueue(run_at: due_now, priority: 1, retryable: false) FakeJob.enqueue(run_at: pending_now, priority: 1) # not due + # Fail a job in the same way it would be failed if the worker had run it + job_to_fail = FakeJob.enqueue(run_at: due_now, priority: 20, retryable: false) + error = StandardError.new("bang") + error.set_backtrace(caller) + Que::Worker.new.send(:handle_job_failure, error, job_to_fail.attrs) + # not due, different queue FakeJob.enqueue(run_at: pending_now, queue: "another", priority: 1) end @@ -27,10 +33,16 @@ collector.call({}) expect(described_class::Queued.values).to eql( - { queue: "default", job_class: "FakeJob", priority: "1", due: "true" } => 2.0, - { queue: "default", job_class: "FakeJob", priority: "10", due: "true" } => 1.0, - { queue: "default", job_class: "FakeJob", priority: "1", due: "false" } => 2.0, - { queue: "another", job_class: "FakeJob", priority: "1", due: "false" } => 1.0, + { queue: "default", job_class: "FakeJob", priority: "1", + due: "true", failed: "false" } => 2.0, + { queue: "default", job_class: "FakeJob", priority: "10", + due: "true", failed: "false" } => 1.0, + { queue: "default", job_class: "FakeJob", priority: "1", + due: "false", failed: "false" } => 2.0, + { queue: "another", job_class: "FakeJob", priority: "1", + due: "false", failed: "false" } => 1.0, + { queue: "default", job_class: "FakeJob", priority: "20", + due: "false", failed: "true" } => 1.0, ) # It's not easy to predict the number of dead tuples deterministically, so we just