From dba3ff52c501a66736b436a455de2af6dde50863 Mon Sep 17 00:00:00 2001
From: Santeri Valjakka <santeri.valjakka@avoin.systems>
Date: Fri, 24 Mar 2023 17:18:36 +0200
Subject: [PATCH] [IMP] queue_job: Vacuum less jobs more often to avoid
 timeouts

In databases where lots of jobs are created it is possible for
enough jobs to be generated in short enough time that eventually
the autovacuum starts to time out. Since there is no cursor commit between
unlinks, the timeout will also lead to the rollback of all
the unlinks done so far, leading to no jobs being deleted.
After the cron has timed out once, it is very likely that it
will keep timing out as more jobs accumulate in the queue.

By running the cron more often but removing less jobs at once,
the timeouts can be avoided without adding cursor commits.

For backwards compatibility, the limit per channel defaults to
None so in databases where the cron already exists with the old
execution interval the functionality remains the same.
---
 queue_job/data/queue_data.xml |  4 ++--
 queue_job/models/queue_job.py | 39 ++++++++++++++++++++---------------
 2 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/queue_job/data/queue_data.xml b/queue_job/data/queue_data.xml
index ca5a747746..4fb47e377c 100644
--- a/queue_job/data/queue_data.xml
+++ b/queue_job/data/queue_data.xml
@@ -22,11 +22,11 @@
             <field eval="True" name="active" />
             <field name="user_id" ref="base.user_root" />
             <field name="interval_number">1</field>
-            <field name="interval_type">days</field>
+            <field name="interval_type">hours</field>
             <field name="numbercall">-1</field>
             <field eval="False" name="doall" />
             <field name="state">code</field>
-            <field name="code">model.autovacuum()</field>
+            <field name="code">model.autovacuum(limit_per_channel=1000)</field>
         </record>
     </data>
     <data noupdate="0">
diff --git a/queue_job/models/queue_job.py b/queue_job/models/queue_job.py
index 58debcc150..736852b0c3 100644
--- a/queue_job/models/queue_job.py
+++ b/queue_job/models/queue_job.py
@@ -390,28 +390,33 @@ def _needaction_domain_get(self):
         """
         return [("state", "=", "failed")]
 
-    def autovacuum(self):
-        """Delete all jobs done based on the removal interval defined on the
-           channel
+    def autovacuum(self, limit_per_channel=None):
+        """Delete jobs done based on the removal interval defined on the
+           channel.
 
         Called from a cron.
+
+        Only deletes specific numbers of jobs from each channel,
+        given as an argument, to avoid timing out in databases with
+        a lot of jobs to delete.
+        The rate at which done jobs are deleted can be adjusted by
+        adjusting the limit and execution interval on the cron.
         """
         for channel in self.env["queue.job.channel"].search([]):
             deadline = datetime.now() - timedelta(days=int(channel.removal_interval))
-            while True:
-                jobs = self.search(
-                    [
-                        "|",
-                        ("date_done", "<=", deadline),
-                        ("date_cancelled", "<=", deadline),
-                        ("channel", "=", channel.complete_name),
-                    ],
-                    limit=1000,
-                )
-                if jobs:
-                    jobs.unlink()
-                else:
-                    break
+            jobs = self.search(
+                [
+                    "|",
+                    ("date_done", "<=", deadline),
+                    ("date_cancelled", "<=", deadline),
+                    ("channel", "=", channel.complete_name),
+                ],
+                limit=limit_per_channel,
+            )
+            # Unlink in smaller batches for performance reasons
+            for job_ids in self._cr.split_for_in_conditions(jobs.ids, 1000):
+                self.browse(job_ids).unlink()
+
         return True
 
     def requeue_stuck_jobs(self, enqueued_delta=5, started_delta=0):