From 4c2364cebe2e11eb9978b33dcf7f36f4450f3c4c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:51:48 +0000 Subject: [PATCH 1/6] Bump ws from 7.5.9 to 7.5.10 in /jbrowse Bumps [ws](https://github.com/websockets/ws) from 7.5.9 to 7.5.10. - [Release notes](https://github.com/websockets/ws/releases) - [Commits](https://github.com/websockets/ws/compare/7.5.9...7.5.10) --- updated-dependencies: - dependency-name: ws dependency-type: indirect ... Signed-off-by: dependabot[bot] --- jbrowse/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json index 1bfd1235d..c9917d82f 100644 --- a/jbrowse/package-lock.json +++ b/jbrowse/package-lock.json @@ -18260,9 +18260,9 @@ } }, "node_modules/webpack-bundle-analyzer/node_modules/ws": { - "version": "7.5.9", - "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.9.tgz", - "integrity": "sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==", + "version": "7.5.10", + "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz", + "integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==", "dev": true, "engines": { "node": ">=8.3.0" @@ -18740,9 +18740,9 @@ } }, "node_modules/ws": { - "version": "8.14.2", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz", - "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==", + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", + "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", "dev": true, "engines": { "node": ">=10.0.0" From 0c4d74d54e2979eb6cc06ed51afa660f90e19029 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 14 Jul 2024 16:53:13 -0700 Subject: [PATCH 2/6] Allow UpdateReadsetFilesHandler to work on BAMs with multiple read groups --- .../analysis/UpdateReadsetFilesHandler.java | 36 ++++++++++++++----- .../AddOrReplaceReadGroupsStep.java | 2 +- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 9fc12cc93..70f711efa 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -39,6 +39,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler { @@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n try (SamReader reader = samReaderFactory.open(so.getFile())) { SAMFileHeader header = reader.getFileHeader().clone(); - int nSamples = reader.getFileHeader().getReadGroups().size(); - if (nSamples != 1) + List rgs = header.getReadGroups(); + Set distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet()); + if (distinctLibraries.size() > 1) { - throw new PipelineJobException("File has more than one read group, found: " + nSamples); + throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", "))); } - List rgs = header.getReadGroups(); - String existingSample = rgs.get(0).getSample(); - if (existingSample.equals(newRsName)) + Set distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); + if (distinctSamples.size() > 1) { - throw new PipelineJobException("Sample names match, aborting"); + throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", "))); + } + + if ( + distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L && + distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L + ) + { + throw new PipelineJobException("Sample and library names match in read group(s), aborting"); } return header; @@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new List rgs = header.getReadGroups(); String existingSample = rgs.get(0).getSample(); - rgs.get(0).setSample(newRsName); + String existingLibrary = rgs.get(0).getLibrary(); + rgs.forEach(rg -> { + rg.setSample(newRsName); + rg.setLibrary(newRsName); + }); File headerBam = new File(ctx.getWorkingDirectory(), "header.bam"); try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam)) { } + + if (!headerBam.exists()) + { + throw new PipelineJobException("Expected header was not created: " + headerBam.getPath()); + } + ctx.getFileManager().addIntermediateFile(headerBam); ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java index 4b845742d..b20715b57 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java @@ -22,7 +22,7 @@ */ public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep implements BamProcessingStep { - public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx) + public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger())); } From 635cf9f2c0b173584d5c3f807708d1bd0373d9e1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Jul 2024 06:54:06 -0700 Subject: [PATCH 3/6] Always read off URL for sequence pipeline page --- .../web/SequenceAnalysis/panel/BaseSequencePanel.js | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js index 81ea54519..0cd18ca62 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js @@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { return; } - // If auto-loading, assume we want to read the URL - thePanel.down('#readUrlParams').setValue(true); - var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template')); if (recIdx > -1) { thePanel.down('labkey-combo').setValue(store.getAt(recIdx)); @@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.', fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset', labelWidth: 200 - },{ - xtype: 'checkbox', - itemId: 'readUrlParams', - helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.', - fieldLabel: 'Read Parameters From URL', - labelWidth: 200 }] }], buttons: [{ @@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { delete json.submitJobToReadsetContainer; } - var readUrlParams = win.down('#readUrlParams').getValue(); - win.sequencePanel.applySavedValues(json, readUrlParams); + win.sequencePanel.applySavedValues(json, true); var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]'); if (submitJobToReadsetContainer) { From b471ef0b4146635ec9e27accd0e00364e046cc7d Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Jul 2024 13:02:37 -0700 Subject: [PATCH 4/6] Improve validation for paraGRAPH --- .../run/alignment/ParagraphStep.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 07636dfef..a6f4605a6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -1,6 +1,7 @@ package org.labkey.sequenceanalysis.run.alignment; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import org.apache.commons.io.FileUtils; @@ -30,6 +31,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; public class ParagraphStep extends AbstractParameterizedOutputHandler { @@ -141,16 +144,23 @@ else if (!svVcf.exists()) { throw new PipelineJobException("No read groups found in input BAM"); } - else if (header.getReadGroups().size() > 1) + + Set uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); + if (uniqueSamples.size() > 1) { - throw new PipelineJobException("More than one read group found in BAM"); + throw new PipelineJobException("Readgroups contained more than one unique sample"); } - rgId = header.getReadGroups().get(0).getSample(); + rgId = uniqueSamples.iterator().next(); JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset())); writer.println("id\tpath\tdepth\tread length"); double depth = json.getJSONObject("autosome").getDouble("depth"); + if (depth <= 0) + { + throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath()); + } + double readLength = json.getInt("read_length"); writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength); } From 4611fc823fa915975fb6bf5f7314f2c95b8201ed Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Jul 2024 20:59:19 -0700 Subject: [PATCH 5/6] Add option to sort VCFs prior to merge --- .../variant/MergeVcfsAndGenotypesHandler.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java index 4b4957590..cbc1f0ead 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java @@ -10,6 +10,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.util.PageFlowUtil; import org.labkey.sequenceanalysis.SequenceAnalysisModule; @@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan public MergeVcfsAndGenotypesHandler() { super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of( - ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "") + ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""), + ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false) )); } @@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz"); + boolean doSort = ctx.getParams().optBoolean("doSort", false); RecordedAction action = new RecordedAction(getName()); @@ -90,7 +93,7 @@ public void processFilesRemote(List inputFiles, JobContext c List inputVCFs = new ArrayList<>(); inputFiles.forEach(x -> inputVCFs.add(x.getFile())); - inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF")); + inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF")); ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next()); new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null); @@ -99,6 +102,15 @@ public void processFilesRemote(List inputFiles, JobContext c throw new PipelineJobException("unable to find output: " + outputVcf.getPath()); } + if (doSort) + { + ctx.getLogger().info("Sorting VCFs"); + for (File f : inputVCFs) + { + SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger()); + } + } + action.addOutput(outputVcf, "Combined VCF", false); SequenceOutputFile so = new SequenceOutputFile(); so.setName(outputVcf.getName()); From d5232a2c7012f3c28b8407d998a988df00858d23 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 24 Jul 2024 11:36:41 -0700 Subject: [PATCH 6/6] Remove unused code --- .../pipeline/ReferenceGenomeManager.java | 5 +++++ .../pipeline/SequencePipelineService.java | 5 ----- .../SequencePipelineServiceImpl.java | 12 ------------ 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java index ec009d358..16deffeaa 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java @@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli } File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory(); + if (localCacheDir == null) + { + throw new PipelineJobException("RemoteGenomeCacheDirectory was not set"); + } + if (isUpToDate(genome)) { log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId()); diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index ef374c097..f2e52eb19 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance) */ abstract public String getDockerCommand(); - /** - * This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml - */ - abstract public String getDockerUser(); - abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index c30c64b95..8c9142869 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -457,18 +457,6 @@ public String getDockerCommand() return "docker"; } - @Override - public String getDockerUser() - { - String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER"); - if (StringUtils.trimToNull(val) != null) - { - return val; - } - - return null; - } - @Override public List getSequenceJobInputFiles(PipelineJob job) {