From 5b998bdbf110966a3c6f9f20a20b354b43e4a793 Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 15:11:04 -0500
Subject: [PATCH 1/7] Add support for Valgrind's Helgrind thread error detector
 tool

Helgrind is usable without any special support, but adding explicit
support allows known-safe data races or lock order violations to be
ignored.

Make sure you have the Valgrind header file 'helgrind.h' and configure
using '--with-helgrind'.  Set the environment variable PBSDEBUG=1 and
then run:
valgrind --tool=helgrind pbs_server

This commit also tells Helgrind to ignore three known data races on
global variables:
- LOGLEVEL
- pbs_tcp_timeout
- last_task_check_time
---
 configure.ac           | 36 +++++++++++++++++++++++++++++++++++-
 src/server/pbsd_main.c | 30 ++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index c3ca9f1730..32fc08f818 100644
--- a/configure.ac
+++ b/configure.ac
@@ -991,7 +991,41 @@ fi
 
 
 
-
+dnl
+dnl enable helgrind support
+dnl
+AC_MSG_CHECKING([whether to enable helgrind support])
+helgrinddir=disabled
+AC_ARG_WITH(helgrind,
+    [  --with-helgrind=DIR     Directory that holds the helgrind headers.
+                          On Linux, 'yes' uses /usr/include/valgrind.],
+    [helgrinddir=$withval])
+case "$helgrinddir" in
+  disabled) ;;
+  no) helgrinddir=disabled ;;
+  yes)
+    case "${PBS_MACH}" in
+      linux) AC_DEFINE([HELGRIND], 1, [Define to enable helgrind support])
+        CFLAGS="$CFLAGS -I/usr/include/valgrind"
+        AC_CHECK_HEADERS([helgrind.h],
+          [CFLAGS="$CFLAGS -DHELGRIND"],
+          [AC_MSG_ERROR(Helgrind headers not found. Try installing valgrind-devel)],
+          []);;
+      *) AC_MSG_ERROR([--with-job-create takes a full path to a directory]);;
+    esac ;;
+  *)
+    case "${PBS_MACH}" in
+      linux) AC_DEFINE([HELGRIND], 1, [Define to enable helgrind support])
+        CFLAGS="$CFLAGS -I$helgrinddir"
+        AC_CHECK_HEADERS([helgrind.h],
+          [CFLAGS="$CFLAGS -DHELGRIND"],
+          [AC_MSG_ERROR("Helgrind headers not found. Is $helgrinddir correct??")],
+          [])
+        ;;
+      *) AC_MSG_ERROR([--with-helgrind takes a full path to a directory]);;
+    esac ;;
+esac
+AC_MSG_RESULT([$helgrinddir])
 
 dnl
 dnl add an option to specify a different path where TORQUE should look for HWLOC 
diff --git a/src/server/pbsd_main.c b/src/server/pbsd_main.c
index ada6eac82f..3c5fc9411c 100644
--- a/src/server/pbsd_main.c
+++ b/src/server/pbsd_main.c
@@ -140,6 +140,9 @@
 #include "ji_mutex.h"
 #include "job_route.h" /* queue_route */
 #include "exiting_jobs.h"
+#ifdef HELGRIND
+#include <helgrind.h>
+#endif
 
 #define TASK_CHECK_INTERVAL    10
 #define HELLO_WAIT_TIME        600
@@ -184,18 +187,18 @@ static void lock_out_ha();
 
 /* external data items */
 
-extern hello_container failures;
-extern int             svr_chngNodesfile;
-extern int             svr_totnodes;
-extern struct all_jobs alljobs;
-extern int run_change_logs;
-
+extern hello_container  failures;
+extern int              svr_chngNodesfile;
+extern int              svr_totnodes;
+extern struct           all_jobs alljobs;
+extern int              run_change_logs;
+extern time_t           pbs_tcp_timeout;
 extern pthread_mutex_t *poll_job_task_mutex;
-extern int max_poll_job_tasks;
+extern int              max_poll_job_tasks;
 
 /* External Functions */
 
-extern int    recov_svr_attr (int);
+extern int   recov_svr_attr (int);
 extern void  change_logs_handler(int);
 extern void  change_logs();
 
@@ -1651,6 +1654,17 @@ int main(
   extern char *msg_svrdown; /* log message   */
   extern char *msg_startup1; /* log message   */
 
+#ifdef HELGRIND
+/* These global variables are written to from one or more threads, but
+ * read from many threads. This is technically a data race, but they
+ * should be benign. Adding mutexes around their access would negatively
+ * impact performance. Tell the Helgrind tool to ignore these.
+ */
+VALGRIND_HG_DISABLE_CHECKING(&LOGLEVEL, sizeof LOGLEVEL);
+VALGRIND_HG_DISABLE_CHECKING(&pbs_tcp_timeout, sizeof pbs_tcp_timeout);
+VALGRIND_HG_DISABLE_CHECKING(&last_task_check_time, sizeof last_task_check_time);
+#endif
+
   ProgName = argv[0];
   srand(get_random_number());
   tzset(); /* localtime_r needs this */

From 8f53458ed35dfe5d06d8b6f6c5ec052516037f62 Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 15:53:25 -0500
Subject: [PATCH 2/7] Don't unlock an already-unlocked queue

req_selectjobs() calls build_selist(), which returns an unlocked queue.
req_selectjobs() proceeds as if the queue is locked, and then tries to
unlock the already-unlocked queue.  Since req_selectjobs() is the only
caller to build_selist(), have it return the queue locked.
---
 src/server/req_select.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/server/req_select.c b/src/server/req_select.c
index 38cbc7ab28..2f15e9c2d7 100644
--- a/src/server/req_select.c
+++ b/src/server/req_select.c
@@ -304,6 +304,7 @@ int req_selectjobs(
 
   plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_select);
 
+  /* This will either leave pque NULL or return a locked queue */
   rc = build_selist(plist, preq->rq_perm, &selistp, &pque, &bad);
 
   if (rc != 0)
@@ -358,7 +359,7 @@ int req_selectjobs(
     }
 
   if (pque != NULL)
-    unlock_queue(pque, "req_selectjobs", NULL, LOGLEVEL);
+    unlock_queue(pque, __func__, NULL, LOGLEVEL);
 
   return PBSE_NONE;
   }  /* END req_selectjobs() */
@@ -972,8 +973,6 @@ static int build_selist(
 
           if (*pque == (pbs_queue *)0)
             return (PBSE_UNKQUE);
-          
-          unlock_queue(*pque, __func__, NULL, LOGLEVEL);
           }
         }
       }

From c8ebe784b98d72c768d4b9f48b13a77ea6af57bd Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 16:11:43 -0500
Subject: [PATCH 3/7] Remove unnecessary locking/unlocking in send_job_work()

Previously, this lock was protecting the line:
  sock = connection[con].ch_socket;
but sock was unused and the line was removed. No need to keep the mutex
lock/unlock in place.
---
 src/server/svr_movejob.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/server/svr_movejob.c b/src/server/svr_movejob.c
index 19b95685d2..d023d6f6df 100644
--- a/src/server/svr_movejob.c
+++ b/src/server/svr_movejob.c
@@ -867,9 +867,6 @@ int send_job_work(
       return(PBSE_SYSTEM);
       }
 
-    pthread_mutex_lock(connection[con].ch_mutex);
-    pthread_mutex_unlock(connection[con].ch_mutex);
-
     if (attempt_to_queue_job == TRUE)
       {
       if (change_substate_on_attempt_to_queue == TRUE)

From 8332f778cfed8d14c2aed7adc1ce4e8ce6dbe07d Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 19:38:52 -0500
Subject: [PATCH 4/7] Quiet thread lock order violations for tasks

The proper lock order for tasks is to acquire the alltasks mutex and
then the mutex for the specific task.
When a task is created, its mutex is locked. Then, trying to add
it to the proper task list acquires the alltasks mutex, which is an
ordering violation. This can't cause a deadlock, because nobody else
would be waiting on that task's mutex. Move the acquisition of the task
mutex to the functions that actually do the insertion. Now, these
functions expect the task unlocked and return it locked.
Also, there are a couple "trylock" situations that violate lock order,
but fall back to correct behavior if they cannot acquire the lock.
Ignore the trylocks when using Helgrind.
---
 src/server/svr_task.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/server/svr_task.c b/src/server/svr_task.c
index feda07b8b0..82afb3cd4e 100644
--- a/src/server/svr_task.c
+++ b/src/server/svr_task.c
@@ -115,6 +115,12 @@ extern task_recycler tr;
 
 
 
+/*
+ * for a time-ordered work-task list, find the existing work
+ * task that comes chronologically AFTER the new work task
+ *
+ * The new work task should NOT be locked when calling this
+ */
 struct work_task *find_insertion_point(
 
   all_tasks *at,
@@ -179,7 +185,6 @@ struct work_task *set_task(
       }
     
     pthread_mutex_init(pnew->wt_mutex,NULL);
-    pthread_mutex_lock(pnew->wt_mutex);
    
     if (type == WORK_Timed)
       {
@@ -361,6 +366,8 @@ work_task *next_task(
 
 /*
  * adds a task to the specified array
+ * expects the work task to be unlocked upon entry
+ * returns it locked
  */
 
 int insert_task(
@@ -379,6 +386,7 @@ int insert_task(
     log_err(rc, __func__, "Cannot allocate space to resize the array");
     }
 
+  pthread_mutex_lock(wt->wt_mutex);
   wt->wt_tasklist = at;
   pthread_mutex_unlock(at->alltasks_mutex);
 
@@ -389,10 +397,11 @@ int insert_task(
 
 
 /*
- * NOTE: we currently don't unlock before in the trylock 
- * because this is only called in set_task, and before is
- * always a new task not in the structure yet
  * adds a task to the array after another
+ *
+ * expects the 'after' work task to be locked
+ * expects the 'before' work task to be unlocked
+ * returns the 'before' task locked
  */
 
 int insert_task_before(
@@ -405,7 +414,15 @@ int insert_task_before(
   int          rc;
   int          i;
 
+/*
+ * NOTE: the trylock below introduces a lock order violation.
+ * Since we are inserting a new task, we know nobody else
+ * can be holding it; deadlock cannot occur. Helgrind doesn't
+ * understand this, so use "correct" locking in that case
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(at->alltasks_mutex))
+#endif
     {
     pthread_mutex_unlock(after->wt_mutex);
     pthread_mutex_lock(at->alltasks_mutex);
@@ -436,6 +453,7 @@ int insert_task_before(
       rc = PBSE_NONE;
     }
 
+  pthread_mutex_lock(before->wt_mutex);
   pthread_mutex_unlock(at->alltasks_mutex);
 
   before->wt_tasklist = at;
@@ -447,6 +465,11 @@ int insert_task_before(
 
 
 
+/*
+ * inserts a task at the beginning
+ * expects the work task to be unlocked
+ * returns it locked
+ */
 int insert_task_first(
     
   all_tasks *at,
@@ -463,6 +486,7 @@ int insert_task_first(
     log_err(rc, __func__, "Cannot allocate space to resize the array");
     }
 
+  pthread_mutex_lock(wt->wt_mutex);
   wt->wt_tasklist = at;
   pthread_mutex_unlock(at->alltasks_mutex);
   
@@ -511,7 +535,15 @@ int remove_task(
   {
   int rc = PBSE_NONE;
 
+/*
+ * NOTE: the trylock below is a lock order violation.
+ * If it would cause a deadlock, it reverts to "correct"
+ * ordering. Helgrind doesn't understand this, so always
+ * use "correct" ordering when using Helgrind
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(at->alltasks_mutex))
+#endif
     {
     pthread_mutex_unlock(wt->wt_mutex);
     pthread_mutex_lock(at->alltasks_mutex);

From 4c7250300ef0791291523c0333d60c7a132ca679 Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 20:16:03 -0500
Subject: [PATCH 5/7] Fix lock_startup(), lock_conn_table(), and lock_ss() to
 actually lock

The functions lock_startup(), lock_conn_table(), and lock_ss()
effectively did nothing.  This led to potential data races and unlocking
non-locked mutexes.
---
 src/lib/Libutils/u_lock_ctl.c | 58 ++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/lib/Libutils/u_lock_ctl.c b/src/lib/Libutils/u_lock_ctl.c
index f9300b68e8..e14d7c3416 100644
--- a/src/lib/Libutils/u_lock_ctl.c
+++ b/src/lib/Libutils/u_lock_ctl.c
@@ -78,16 +78,15 @@ int lock_startup()
   {
   int rc = PBSE_NONE;
 
-  if (locks == NULL)
+  if ((locks == NULL) && ((rc = lock_init()) != PBSE_NONE))
     {
-    if ((rc = lock_init()) == PBSE_NONE)
-      {
-      if (pthread_mutex_lock(locks->startup) != 0)
-        {
-        log_err(-1,"mutex_lock","ALERT:   cannot lock startup mutex!\n");
-        return(PBSE_MUTEX);
-        }
-      }
+    log_err(-1, "mutex_lock", "ALERT:   cannot initialize mutexes!\n");
+    return(PBSE_MUTEX);
+    }
+  if (pthread_mutex_lock(locks->startup) != 0)
+    {
+    log_err(-1, "mutex_lock", "ALERT:   cannot lock startup mutex!\n");
+    return(PBSE_MUTEX);
     }
 
   return(rc);
@@ -101,7 +100,7 @@ int unlock_startup()
   {
   if (pthread_mutex_unlock(locks->startup) != 0)
     {
-    log_err(-1,"mutex_unlock","ALERT:   cannot unlock startup mutex!\n");
+    log_err(-1, "mutex_unlock", "ALERT:   cannot unlock startup mutex!\n");
     return(PBSE_MUTEX);
     }
 
@@ -116,18 +115,15 @@ int lock_conn_table()
   {
   int rc = PBSE_NONE;
 
-  if (locks == NULL)
+  if ((locks == NULL) && ((rc = lock_init()) != PBSE_NONE))
     {
-    if ((rc = lock_init()) == PBSE_NONE)
-      {
-      lock_init();
-      
-      if (pthread_mutex_lock(locks->conn_table) != 0)
-        {
-        log_err(-1,"mutex_lock","ALERT:   cannot lock conn_table mutex!\n");
-        return(PBSE_MUTEX);
-        }
-      }
+    log_err(-1, "mutex_lock", "ALERT:   cannot initialize mutexes!\n");
+    return(PBSE_MUTEX);
+    }
+  if (pthread_mutex_lock(locks->conn_table) != 0)
+    {
+    log_err(-1, "mutex_lock", "ALERT:   cannot lock conn_table mutex!\n");
+    return(PBSE_MUTEX);
     }
 
   return(rc);
@@ -155,17 +151,15 @@ int lock_ss()
   {
   int rc = PBSE_NONE;
 
-  if (locks == NULL)
+  if ((locks == NULL) && ((rc = lock_init()) != PBSE_NONE))
     {
-    if ((rc = lock_init()) == PBSE_NONE)
-      {
-    
-      if (pthread_mutex_lock(locks->setup_save) != 0)
-        {
-        log_err(-1,"mutex_lock","ALERT:   cannot lock setup_save mutex!\n");
-        return(PBSE_MUTEX);
-        }
-      }
+    log_err(-1, "mutex_lock", "ALERT:   cannot initialize mutexes!\n");
+    return(PBSE_MUTEX);
+    }
+  if (pthread_mutex_lock(locks->setup_save) != 0)
+    {
+    log_err(-1, "mutex_lock", "ALERT:   cannot lock setup_save mutex!\n");
+    return(PBSE_MUTEX);
     }
 
   return(rc);
@@ -178,7 +172,7 @@ int unlock_ss()
   {
   if (pthread_mutex_unlock(locks->setup_save) != 0)
     {
-    log_err(-1,"mutex_unlock","ALERT:   cannot unlock setup_save mutex!\n");
+    log_err(-1, "mutex_unlock", "ALERT:   cannot unlock setup_save mutex!\n");
     return(PBSE_MUTEX);
     }
 

From 38f5d549e0f5f29ecd7ded6d42b508c8c2e7235f Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 20:50:46 -0500
Subject: [PATCH 6/7] Observe correct lock order when creating queues

Newly created queues are locked prior to being inserted into a global
queue hash. This violates the lock order of allqueues => queue. Change so
the proper order is observed.
---
 src/server/queue_func.c     | 11 ++++++++---
 src/server/queue_recycler.c |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/server/queue_func.c b/src/server/queue_func.c
index c3b37f1f84..f448bbf6aa 100644
--- a/src/server/queue_func.c
+++ b/src/server/queue_func.c
@@ -247,7 +247,6 @@ pbs_queue *que_alloc(
   initialize_all_jobs_array(pq->qu_jobs);
   initialize_all_jobs_array(pq->qu_jobs_array_sum);
   pthread_mutex_init(pq->qu_mutex,NULL);
-  lock_queue(pq, __func__, NULL, LOGLEVEL);
 
   snprintf(pq->qu_qs.qu_name, sizeof(pq->qu_qs.qu_name), "%s", name);
 
@@ -320,7 +319,7 @@ void que_free(
   remove_queue(&svr_queues, pq);
   pq->q_being_recycled = TRUE;
   insert_into_queue_recycler(pq);
-  unlock_queue(pq, "que_free", NULL, LOGLEVEL);
+  unlock_queue(pq, __func__, NULL, LOGLEVEL);
 
   return;
   }  /* END que_free() */
@@ -468,7 +467,12 @@ void free_alljobs_array(
 
 
 
-
+/*
+ * insert a queue to an all_queues hash
+ *
+ * expects a queue that is unlocked
+ * returns a the queue locked
+ */
 int insert_queue(
 
   all_queues *aq,
@@ -490,6 +494,7 @@ int insert_queue(
     rc = PBSE_NONE;
     }
 
+  lock_queue(pque, __func__, NULL, LOGLEVEL);
   pthread_mutex_unlock(aq->allques_mutex);
 
   return(rc);
diff --git a/src/server/queue_recycler.c b/src/server/queue_recycler.c
index 650dd09458..873866299d 100644
--- a/src/server/queue_recycler.c
+++ b/src/server/queue_recycler.c
@@ -100,6 +100,7 @@ int insert_into_queue_recycler(
     enqueue_threadpool_request(remove_some_recycle_queues,NULL);
     }
 
+  unlock_queue(pq, __func__, NULL, LOGLEVEL);
   rc = insert_queue(&q_recycler.queues,pq);
 
   update_queue_recycler_next_id();

From 2af22977c4a2b1a17a7bea4c9e1df1a3d76264e1 Mon Sep 17 00:00:00 2001
From: Matt Ezell <ezellma@ornl.gov>
Date: Sat, 8 Dec 2012 22:08:13 -0500
Subject: [PATCH 7/7] Have Helgrind ignore pthread_mutex_trylock() lock order
 violations

There are several places where lock order is violated by calling
pthread_mutex_trylock(). Deadlock cannot occur, so have Helgrind ignore
these situations.
---
 src/server/array_func.c    |  6 ++++++
 src/server/job_container.c | 18 ++++++++++++++++++
 src/server/node_func.c     | 12 ++++++++++++
 src/server/queue_func.c    | 12 ++++++++++++
 4 files changed, 48 insertions(+)

diff --git a/src/server/array_func.c b/src/server/array_func.c
index f67b64adfa..2497e382e2 100644
--- a/src/server/array_func.c
+++ b/src/server/array_func.c
@@ -1846,7 +1846,13 @@ int remove_array(
   int  rc;
   char arrayid[PBS_MAXSVRJOBID+1];
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(allarrays.allarrays_mutex))
+#endif
     {
     strcpy(arrayid, pa->ai_qs.parent_id);
 
diff --git a/src/server/job_container.c b/src/server/job_container.c
index fca7ab2a6c..e501f4ae04 100644
--- a/src/server/job_container.c
+++ b/src/server/job_container.c
@@ -665,7 +665,13 @@ int get_jobs_index(
   {
   int  index;
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(aj->alljobs_mutex))
+#endif
     {
     unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
     pthread_mutex_lock(aj->alljobs_mutex);
@@ -703,7 +709,13 @@ int has_job(
 
   strcpy(jobid, pjob->ji_qs.ji_jobid);
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(aj->alljobs_mutex))
+#endif
     {
     unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
     pthread_mutex_lock(aj->alljobs_mutex);
@@ -750,7 +762,13 @@ int  remove_job(
 
   if (LOGLEVEL >= 10)
     LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, pjob->ji_qs.ji_jobid);
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(aj->alljobs_mutex))
+#endif
     {
     unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
     pthread_mutex_lock(aj->alljobs_mutex);
diff --git a/src/server/node_func.c b/src/server/node_func.c
index 03dd8d4aae..cd37b28676 100644
--- a/src/server/node_func.c
+++ b/src/server/node_func.c
@@ -3214,7 +3214,13 @@ int remove_node(
   {
   int rc = PBSE_NONE;
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(an->allnodes_mutex))
+#endif
     {
     unlock_node(pnode, __func__, NULL, LOGLEVEL);
     pthread_mutex_lock(an->allnodes_mutex);
@@ -3241,7 +3247,13 @@ struct pbsnode *next_host(
   struct pbsnode *pnode;
   char           *name = NULL;
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(an->allnodes_mutex))
+#endif
     {
     if (held != NULL)
       {
diff --git a/src/server/queue_func.c b/src/server/queue_func.c
index f448bbf6aa..e4b33432f6 100644
--- a/src/server/queue_func.c
+++ b/src/server/queue_func.c
@@ -514,7 +514,13 @@ int remove_queue(
   int  index;
   char log_buf[1000];
 
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
   if (pthread_mutex_trylock(aq->allques_mutex))
+#endif
     {
     unlock_queue(pque, __func__, NULL, LOGLEVEL);
     pthread_mutex_lock(aq->allques_mutex);
@@ -678,7 +684,13 @@ pbs_queue *lock_queue_with_job_held(
 
   if (pque != NULL)
     {
+/*
+ * Acquiring this lock would be a lock order violation, but
+ * deadlock cannot occur. Have Helgrind ignore this.
+ */
+#ifndef HELGRIND
     if (pthread_mutex_trylock(pque->qu_mutex))
+#endif
       {
       /* if fail */
       strcpy(jobid, pjob->ji_qs.ji_jobid);