Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
}

File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
if (localCacheDir == null)
{
throw new PipelineJobException("RemoteGenomeCacheDirectory was not set");
}

if (isUpToDate(genome))
{
log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance)
*/
abstract public String getDockerCommand();

/**
* This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml
*/
abstract public String getDockerUser();

abstract public List<File> getSequenceJobInputFiles(PipelineJob job);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
return;
}

// If auto-loading, assume we want to read the URL
thePanel.down('#readUrlParams').setValue(true);

var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template'));
if (recIdx > -1) {
thePanel.down('labkey-combo').setValue(store.getAt(recIdx));
Expand Down Expand Up @@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset',
labelWidth: 200
},{
xtype: 'checkbox',
itemId: 'readUrlParams',
helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.',
fieldLabel: 'Read Parameters From URL',
labelWidth: 200
}]
}],
buttons: [{
Expand Down Expand Up @@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
delete json.submitJobToReadsetContainer;
}

var readUrlParams = win.down('#readUrlParams').getValue();
win.sequencePanel.applySavedValues(json, readUrlParams);
win.sequencePanel.applySavedValues(json, true);

var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]');
if (submitJobToReadsetContainer) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,18 +457,6 @@ public String getDockerCommand()
return "docker";
}

@Override
public String getDockerUser()
{
String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER");
if (StringUtils.trimToNull(val) != null)
{
return val;
}

return null;
}

@Override
public List<File> getSequenceJobInputFiles(PipelineJob job)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
{
Expand Down Expand Up @@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n
try (SamReader reader = samReaderFactory.open(so.getFile()))
{
SAMFileHeader header = reader.getFileHeader().clone();
int nSamples = reader.getFileHeader().getReadGroups().size();
if (nSamples != 1)
List<SAMReadGroupRecord> rgs = header.getReadGroups();
Set<String> distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet());
if (distinctLibraries.size() > 1)
{
throw new PipelineJobException("File has more than one read group, found: " + nSamples);
throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", ")));
}

List<SAMReadGroupRecord> rgs = header.getReadGroups();
String existingSample = rgs.get(0).getSample();
if (existingSample.equals(newRsName))
Set<String> distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
if (distinctSamples.size() > 1)
{
throw new PipelineJobException("Sample names match, aborting");
throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", ")));
}

if (
distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L &&
distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L
)
{
throw new PipelineJobException("Sample and library names match in read group(s), aborting");
}

return header;
Expand Down Expand Up @@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new

List<SAMReadGroupRecord> rgs = header.getReadGroups();
String existingSample = rgs.get(0).getSample();
rgs.get(0).setSample(newRsName);
String existingLibrary = rgs.get(0).getLibrary();
rgs.forEach(rg -> {
rg.setSample(newRsName);
rg.setLibrary(newRsName);
});

File headerBam = new File(ctx.getWorkingDirectory(), "header.bam");
try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam))
{

}

if (!headerBam.exists())
{
throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
}

ctx.getFileManager().addIntermediateFile(headerBam);
ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.labkey.sequenceanalysis.run.alignment;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.apache.commons.io.FileUtils;
Expand Down Expand Up @@ -30,6 +31,8 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
{
Expand Down Expand Up @@ -141,16 +144,23 @@ else if (!svVcf.exists())
{
throw new PipelineJobException("No read groups found in input BAM");
}
else if (header.getReadGroups().size() > 1)

Set<String> uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
if (uniqueSamples.size() > 1)
{
throw new PipelineJobException("More than one read group found in BAM");
throw new PipelineJobException("Readgroups contained more than one unique sample");
}

rgId = header.getReadGroups().get(0).getSample();
rgId = uniqueSamples.iterator().next();

JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
writer.println("id\tpath\tdepth\tread length");
double depth = json.getJSONObject("autosome").getDouble("depth");
if (depth <= 0)
{
throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath());
}

double readLength = json.getInt("read_length");
writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
*/
public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep<AddOrReplaceReadGroupsWrapper> implements BamProcessingStep
{
public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx)
public AddOrReplaceReadGroupsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
Expand All @@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan
public MergeVcfsAndGenotypesHandler()
{
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of(
ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "")
ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""),
ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false)
));
}

Expand Down Expand Up @@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
{
File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz");
boolean doSort = ctx.getParams().optBoolean("doSort", false);

RecordedAction action = new RecordedAction(getName());

Expand All @@ -90,7 +93,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c

List<File> inputVCFs = new ArrayList<>();
inputFiles.forEach(x -> inputVCFs.add(x.getFile()));
inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF"));
inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF"));

ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next());
new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null);
Expand All @@ -99,6 +102,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
throw new PipelineJobException("unable to find output: " + outputVcf.getPath());
}

if (doSort)
{
ctx.getLogger().info("Sorting VCFs");
for (File f : inputVCFs)
{
SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger());
}
}

action.addOutput(outputVcf, "Combined VCF", false);
SequenceOutputFile so = new SequenceOutputFile();
so.setName(outputVcf.getName());
Expand Down