diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java index ec009d358..16deffeaa 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java @@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli } File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory(); + if (localCacheDir == null) + { + throw new PipelineJobException("RemoteGenomeCacheDirectory was not set"); + } + if (isUpToDate(genome)) { log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId()); diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index ef374c097..f2e52eb19 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance) */ abstract public String getDockerCommand(); - /** - * This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml - */ - abstract public String getDockerUser(); - abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js index 81ea54519..0cd18ca62 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js @@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { return; } - // If auto-loading, assume we want to read the URL - thePanel.down('#readUrlParams').setValue(true); - var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template')); if (recIdx > -1) { thePanel.down('labkey-combo').setValue(store.getAt(recIdx)); @@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.', fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset', labelWidth: 200 - },{ - xtype: 'checkbox', - itemId: 'readUrlParams', - helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.', - fieldLabel: 'Read Parameters From URL', - labelWidth: 200 }] }], buttons: [{ @@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', { delete json.submitJobToReadsetContainer; } - var readUrlParams = win.down('#readUrlParams').getValue(); - win.sequencePanel.applySavedValues(json, readUrlParams); + win.sequencePanel.applySavedValues(json, true); var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]'); if (submitJobToReadsetContainer) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index c30c64b95..8c9142869 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -457,18 +457,6 @@ public String getDockerCommand() return "docker"; } - @Override - public String getDockerUser() - { - String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER"); - if (StringUtils.trimToNull(val) != null) - { - return val; - } - - return null; - } - @Override public List getSequenceJobInputFiles(PipelineJob job) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 9fc12cc93..70f711efa 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -39,6 +39,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler { @@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n try (SamReader reader = samReaderFactory.open(so.getFile())) { SAMFileHeader header = reader.getFileHeader().clone(); - int nSamples = reader.getFileHeader().getReadGroups().size(); - if (nSamples != 1) + List rgs = header.getReadGroups(); + Set distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet()); + if (distinctLibraries.size() > 1) { - throw new PipelineJobException("File has more than one read group, found: " + nSamples); + throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", "))); } - List rgs = header.getReadGroups(); - String existingSample = rgs.get(0).getSample(); - if (existingSample.equals(newRsName)) + Set distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); + if (distinctSamples.size() > 1) { - throw new PipelineJobException("Sample names match, aborting"); + throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", "))); + } + + if ( + distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L && + distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L + ) + { + throw new PipelineJobException("Sample and library names match in read group(s), aborting"); } return header; @@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new List rgs = header.getReadGroups(); String existingSample = rgs.get(0).getSample(); - rgs.get(0).setSample(newRsName); + String existingLibrary = rgs.get(0).getLibrary(); + rgs.forEach(rg -> { + rg.setSample(newRsName); + rg.setLibrary(newRsName); + }); File headerBam = new File(ctx.getWorkingDirectory(), "header.bam"); try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam)) { } + + if (!headerBam.exists()) + { + throw new PipelineJobException("Expected header was not created: " + headerBam.getPath()); + } + ctx.getFileManager().addIntermediateFile(headerBam); ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 07636dfef..a6f4605a6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -1,6 +1,7 @@ package org.labkey.sequenceanalysis.run.alignment; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import org.apache.commons.io.FileUtils; @@ -30,6 +31,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; public class ParagraphStep extends AbstractParameterizedOutputHandler { @@ -141,16 +144,23 @@ else if (!svVcf.exists()) { throw new PipelineJobException("No read groups found in input BAM"); } - else if (header.getReadGroups().size() > 1) + + Set uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); + if (uniqueSamples.size() > 1) { - throw new PipelineJobException("More than one read group found in BAM"); + throw new PipelineJobException("Readgroups contained more than one unique sample"); } - rgId = header.getReadGroups().get(0).getSample(); + rgId = uniqueSamples.iterator().next(); JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset())); writer.println("id\tpath\tdepth\tread length"); double depth = json.getJSONObject("autosome").getDouble("depth"); + if (depth <= 0) + { + throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath()); + } + double readLength = json.getInt("read_length"); writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java index 4b845742d..b20715b57 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java @@ -22,7 +22,7 @@ */ public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep implements BamProcessingStep { - public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx) + public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger())); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java index 4b4957590..cbc1f0ead 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java @@ -10,6 +10,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.util.PageFlowUtil; import org.labkey.sequenceanalysis.SequenceAnalysisModule; @@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan public MergeVcfsAndGenotypesHandler() { super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of( - ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "") + ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""), + ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false) )); } @@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz"); + boolean doSort = ctx.getParams().optBoolean("doSort", false); RecordedAction action = new RecordedAction(getName()); @@ -90,7 +93,7 @@ public void processFilesRemote(List inputFiles, JobContext c List inputVCFs = new ArrayList<>(); inputFiles.forEach(x -> inputVCFs.add(x.getFile())); - inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF")); + inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF")); ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next()); new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null); @@ -99,6 +102,15 @@ public void processFilesRemote(List inputFiles, JobContext c throw new PipelineJobException("unable to find output: " + outputVcf.getPath()); } + if (doSort) + { + ctx.getLogger().info("Sorting VCFs"); + for (File f : inputVCFs) + { + SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger()); + } + } + action.addOutput(outputVcf, "Combined VCF", false); SequenceOutputFile so = new SequenceOutputFile(); so.setName(outputVcf.getName());