From 4c2364cebe2e11eb9978b33dcf7f36f4450f3c4c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 18 Jun 2024 17:51:48 +0000
Subject: [PATCH 1/6] Bump ws from 7.5.9 to 7.5.10 in /jbrowse

Bumps [ws](https://github.com/websockets/ws) from 7.5.9 to 7.5.10.
- [Release notes](https://github.com/websockets/ws/releases)
- [Commits](https://github.com/websockets/ws/compare/7.5.9...7.5.10)

---
updated-dependencies:
- dependency-name: ws
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 jbrowse/package-lock.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json
index 1bfd1235d..c9917d82f 100644
--- a/jbrowse/package-lock.json
+++ b/jbrowse/package-lock.json
@@ -18260,9 +18260,9 @@
       }
     },
     "node_modules/webpack-bundle-analyzer/node_modules/ws": {
-      "version": "7.5.9",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.9.tgz",
-      "integrity": "sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==",
+      "version": "7.5.10",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz",
+      "integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==",
       "dev": true,
       "engines": {
         "node": ">=8.3.0"
@@ -18740,9 +18740,9 @@
       }
     },
     "node_modules/ws": {
-      "version": "8.14.2",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
-      "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
+      "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
       "dev": true,
       "engines": {
         "node": ">=10.0.0"

From 0c4d74d54e2979eb6cc06ed51afa660f90e19029 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 14 Jul 2024 16:53:13 -0700
Subject: [PATCH 2/6] Allow UpdateReadsetFilesHandler to work on BAMs with
 multiple read groups

---
 .../analysis/UpdateReadsetFilesHandler.java   | 36 ++++++++++++++-----
 .../AddOrReplaceReadGroupsStep.java           |  2 +-
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java
index 9fc12cc93..70f711efa 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java
@@ -39,6 +39,8 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
@@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n
             try (SamReader reader = samReaderFactory.open(so.getFile()))
             {
                 SAMFileHeader header = reader.getFileHeader().clone();
-                int nSamples = reader.getFileHeader().getReadGroups().size();
-                if (nSamples != 1)
+                List<SAMReadGroupRecord> rgs = header.getReadGroups();
+                Set<String> distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet());
+                if (distinctLibraries.size() > 1)
                 {
-                    throw new PipelineJobException("File has more than one read group, found: " + nSamples);
+                    throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", ")));
                 }
 
-                List<SAMReadGroupRecord> rgs = header.getReadGroups();
-                String existingSample = rgs.get(0).getSample();
-                if (existingSample.equals(newRsName))
+                Set<String> distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
+                if (distinctSamples.size() > 1)
                 {
-                    throw new PipelineJobException("Sample names match, aborting");
+                    throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", ")));
+                }
+
+                if (
+                        distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L &&
+                        distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L
+                )
+                {
+                    throw new PipelineJobException("Sample and library names match in read group(s), aborting");
                 }
 
                 return header;
@@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
 
                 List<SAMReadGroupRecord> rgs = header.getReadGroups();
                 String existingSample = rgs.get(0).getSample();
-                rgs.get(0).setSample(newRsName);
+                String existingLibrary = rgs.get(0).getLibrary();
+                rgs.forEach(rg -> {
+                    rg.setSample(newRsName);
+                    rg.setLibrary(newRsName);
+                });
 
                 File headerBam = new File(ctx.getWorkingDirectory(), "header.bam");
                 try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam))
                 {
 
                 }
+
+                if (!headerBam.exists())
+                {
+                    throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
+                }
+
                 ctx.getFileManager().addIntermediateFile(headerBam);
                 ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));
 
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java
index 4b845742d..b20715b57 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java
@@ -22,7 +22,7 @@
  */
 public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep<AddOrReplaceReadGroupsWrapper> implements BamProcessingStep
 {
-    public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx)
+    public AddOrReplaceReadGroupsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
         super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger()));
     }

From 635cf9f2c0b173584d5c3f807708d1bd0373d9e1 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 17 Jul 2024 06:54:06 -0700
Subject: [PATCH 3/6] Always read off URL for sequence pipeline page

---
 .../web/SequenceAnalysis/panel/BaseSequencePanel.js  | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js
index 81ea54519..0cd18ca62 100644
--- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js
+++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js
@@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                                     return;
                                                 }
 
-                                                // If auto-loading, assume we want to read the URL
-                                                thePanel.down('#readUrlParams').setValue(true);
-
                                                 var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template'));
                                                 if (recIdx > -1) {
                                                     thePanel.down('labkey-combo').setValue(store.getAt(recIdx));
@@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                 helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
                                 fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset',
                                 labelWidth: 200
-                            },{
-                                xtype: 'checkbox',
-                                itemId: 'readUrlParams',
-                                helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.',
-                                fieldLabel: 'Read Parameters From URL',
-                                labelWidth: 200
                             }]
                         }],
                         buttons: [{
@@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                 delete json.submitJobToReadsetContainer;
                             }
 
-                            var readUrlParams = win.down('#readUrlParams').getValue();
-                            win.sequencePanel.applySavedValues(json, readUrlParams);
+                            win.sequencePanel.applySavedValues(json, true);
 
                             var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]');
                             if (submitJobToReadsetContainer) {

From b471ef0b4146635ec9e27accd0e00364e046cc7d Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 17 Jul 2024 13:02:37 -0700
Subject: [PATCH 4/6] Improve validation for paraGRAPH

---
 .../run/alignment/ParagraphStep.java             | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java
index 07636dfef..a6f4605a6 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java
@@ -1,6 +1,7 @@
 package org.labkey.sequenceanalysis.run.alignment;
 
 import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMReadGroupRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
 import org.apache.commons.io.FileUtils;
@@ -30,6 +31,8 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
@@ -141,16 +144,23 @@ else if (!svVcf.exists())
                     {
                         throw new PipelineJobException("No read groups found in input BAM");
                     }
-                    else if (header.getReadGroups().size() > 1)
+
+                    Set<String> uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
+                    if (uniqueSamples.size() > 1)
                     {
-                        throw new PipelineJobException("More than one read group found in BAM");
+                        throw new PipelineJobException("Readgroups contained more than one unique sample");
                     }
 
-                    rgId = header.getReadGroups().get(0).getSample();
+                    rgId = uniqueSamples.iterator().next();
 
                     JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
                     writer.println("id\tpath\tdepth\tread length");
                     double depth = json.getJSONObject("autosome").getDouble("depth");
+                    if (depth <= 0)
+                    {
+                        throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath());
+                    }
+
                     double readLength = json.getInt("read_length");
                     writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
                 }

From 4611fc823fa915975fb6bf5f7314f2c95b8201ed Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 17 Jul 2024 20:59:19 -0700
Subject: [PATCH 5/6] Add option to sort VCFs prior to merge

---
 .../variant/MergeVcfsAndGenotypesHandler.java    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java
index 4b4957590..cbc1f0ead 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java
@@ -10,6 +10,7 @@
 import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
 import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
 import org.labkey.api.util.PageFlowUtil;
 import org.labkey.sequenceanalysis.SequenceAnalysisModule;
@@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan
     public MergeVcfsAndGenotypesHandler()
     {
         super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of(
-                ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "")
+                ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""),
+                ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false)
         ));
     }
 
@@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
         public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
         {
             File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz");
+            boolean doSort = ctx.getParams().optBoolean("doSort", false);
 
             RecordedAction action = new RecordedAction(getName());
 
@@ -90,7 +93,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
 
             List<File> inputVCFs = new ArrayList<>();
             inputFiles.forEach(x -> inputVCFs.add(x.getFile()));
-            inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF"));
+            inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF"));
 
             ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next());
             new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null);
@@ -99,6 +102,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
                 throw new PipelineJobException("unable to find output: " + outputVcf.getPath());
             }
 
+            if (doSort)
+            {
+                ctx.getLogger().info("Sorting VCFs");
+                for (File f : inputVCFs)
+                {
+                    SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger());
+                }
+            }
+
             action.addOutput(outputVcf, "Combined VCF", false);
             SequenceOutputFile so = new SequenceOutputFile();
             so.setName(outputVcf.getName());

From d5232a2c7012f3c28b8407d998a988df00858d23 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 24 Jul 2024 11:36:41 -0700
Subject: [PATCH 6/6] Remove unused code

---
 .../pipeline/ReferenceGenomeManager.java             |  5 +++++
 .../pipeline/SequencePipelineService.java            |  5 -----
 .../SequencePipelineServiceImpl.java                 | 12 ------------
 3 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java
index ec009d358..16deffeaa 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java
@@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
         }
 
         File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
+        if (localCacheDir == null)
+        {
+            throw new PipelineJobException("RemoteGenomeCacheDirectory was not set");
+        }
+
         if (isUpToDate(genome))
         {
             log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId());
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
index ef374c097..f2e52eb19 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
@@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance)
      */
     abstract public String getDockerCommand();
 
-    /**
-     * This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml
-     */
-    abstract public String getDockerUser();
-
     abstract public List<File> getSequenceJobInputFiles(PipelineJob job);
 
     /**
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
index c30c64b95..8c9142869 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
@@ -457,18 +457,6 @@ public String getDockerCommand()
         return "docker";
     }
 
-    @Override
-    public String getDockerUser()
-    {
-        String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER");
-        if (StringUtils.trimToNull(val) != null)
-        {
-            return val;
-        }
-
-        return null;
-    }
-
     @Override
     public List<File> getSequenceJobInputFiles(PipelineJob job)
     {