Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3270659
Show cellranger version
bbimber Aug 2, 2024
9669c74
Improve handling of TRA-DV lineages
bbimber Aug 4, 2024
960c48b
Move getLocationForCachedInputs to SequenceJob
bbimber Aug 4, 2024
326265a
Allow alignment jobs to re-download archived SRA data
bbimber Aug 4, 2024
8700018
Bugfix to handling of TRAV/DV segments
bbimber Aug 4, 2024
2742759
Bugfix when doSraDownloadIfNeeded not provided
bbimber Aug 4, 2024
a57d9a3
Primers should not be required for CellRangerVDJWrapper
bbimber Aug 4, 2024
d8e4752
Primers are required for CellRangerVDJWrapper
bbimber Aug 4, 2024
2fd324b
Allow client-side to accept SRA archived data
bbimber Aug 4, 2024
0df977e
Set job status in SequenceAlignmentTask
bbimber Aug 5, 2024
bcaf6bf
Improve cellranger description fields
bbimber Aug 5, 2024
67f77b7
Bugfix to cached ReadData
bbimber Aug 5, 2024
e285a64
Ensure SRA data not deleted until end of job
bbimber Aug 7, 2024
6545d95
Update AppendCiteSeq for newer versions
bbimber Aug 7, 2024
68f6af8
Support minAllowableDoubletRateFilter
bbimber Aug 7, 2024
098bb39
Do more cleanup in CellRangerVDJWrapper
bbimber Aug 7, 2024
69479e9
Bugfix SRA download
bbimber Aug 8, 2024
ab24cc3
Bugfix to cell hashing command
bbimber Aug 8, 2024
e90c143
Push RIRA logic into R package
bbimber Aug 9, 2024
18013c8
Bugfix to MergeBamAlignment
bbimber Aug 10, 2024
1280c0a
More verbose logging in CellRangerVDJWrapper
bbimber Aug 13, 2024
ffd2f1d
Skip duplicated SRA accessions when re-downloading
bbimber Aug 14, 2024
82c7405
Update defaults for CellRangerGexCountStep
bbimber Aug 16, 2024
075a2ee
Allow seurat pipeline to be kicked off against 10x Run Summaries
bbimber Aug 16, 2024
0b52dce
Bugfix to UpdateReadsetFilesHandler
bbimber Aug 20, 2024
324cf7e
Bugfix to AppendCiteSeq
bbimber Aug 20, 2024
b29dfe7
Bugfix to UpdateReadsetFilesHandler
bbimber Aug 20, 2024
82797da
Bugfix to CellRangerVDJWrapper
bbimber Aug 24, 2024
7008b39
Bugfix to CellRangerVDJWrapper
bbimber Aug 25, 2024
4b3375e
Allow CellRanger to more easily skip BAM creation
bbimber Aug 25, 2024
760c35d
Allow CellRangerVDH to more easily skip BAM creation
bbimber Aug 25, 2024
f5e1eb7
Second fix for alignments that dont produce BAMs
bbimber Aug 25, 2024
3a6b43e
Fix for alignments that dont produce BAMs
bbimber Aug 26, 2024
340ee5a
Ensure cellranger-dependent steps retain BAM
bbimber Aug 26, 2024
beb1bac
Bugfix to BAM-less jobs
bbimber Aug 26, 2024
d11cec0
Update CellRanger metrics for BAM-less jobs
bbimber Aug 26, 2024
cb345c1
Bugfix to FeaturePlots.R when tsne not present
bbimber Aug 26, 2024
5330687
Add validation to Cell Ranger steps
bbimber Aug 26, 2024
93ec686
Change order of steps when no BAM present
bbimber Aug 26, 2024
0365054
Reduce log level when BAM is discarded
bbimber Aug 26, 2024
e7b6133
Walk back create-bam arg in CellRangerVDJWrapper
bbimber Aug 28, 2024
fad7e62
Merge discvr-24.7 to develop
bbimber Aug 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', {
title: 'Add Steps',
border: false,
width: 800,
autoScroll: true,
maxHeight: '90%',
items: items,
buttons: [{
text: 'Done',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
containerPath: this.queryContainer,
schemaName: 'sequenceanalysis',
queryName: 'readdata',
columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists',
columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists,sra_accession',
metadata: {
queryContainerPath: {
createIfDoesNotExist: true,
Expand All @@ -160,11 +160,17 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
load: function (store) {
var errors = [];
var errorNames = [];
var archived = [];
store.each(function(rec){
if (rec.get('fileid1')){
if (!rec.get('fileid1/fileexists')){
errors.push(rec);
errorNames.push(rec.get('readset/name'));
if (!rec.get('sra_accession')) {
errors.push(rec);
errorNames.push(rec.get('readset/name'));
}
else {
archived.push(rec.get('readset/name'))
}
}
else {
this.fileIds.push(rec.get('fileid1'));
Expand All @@ -178,8 +184,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {

if (rec.get('fileid2')){
if (!rec.get('fileid2/fileexists')){
errors.push(rec);
errorNames.push(rec.get('name'))
if (!rec.get('sra_accession')) {
errors.push(rec);
errorNames.push(rec.get('name'))
}
else {
archived.push(rec.get('name'));
}
}
else {
this.fileIds.push(rec.get('fileid2'));
Expand All @@ -188,7 +199,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
}
}, this);

this.onStoreLoad(errorNames);
this.onStoreLoad(errorNames, archived);

var target = this.down('#readsetCount');
if (target) {
Expand All @@ -201,13 +212,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {

storesLoaded: 0,
errorNames: [],
archivedNames: [],

onStoreLoad: function(errorNames){
onStoreLoad: function(errorNames, archivedNames){
this.storesLoaded++;
if (errorNames){
this.errorNames = this.errorNames.concat(errorNames);
this.errorNames = Ext4.unique(this.errorNames);
}

if (archivedNames) {
this.archivedNames = Ext4.unique(this.archivedNames.concat(archivedNames));
}
if (this.storesLoaded === 2){
this.afterStoreLoad();
}
Expand All @@ -225,7 +241,10 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
dv.refresh();

if (this.errorNames.length){
alert('The follow readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
alert('The following readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
}
else if (this.archivedNames.length) {
Ext4.Msg.alert('Warning', 'One or more readsets contains SRA archived data. Please choose the option to auto-download these data');
}
},

Expand Down Expand Up @@ -326,6 +345,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
uncheckedValue: false,
checked: false,
xtype: 'checkbox'
},{
fieldLabel: 'Restore SRA Data If Needed',
helpPopup: 'If selected, any archived sequence data that contains an SRA accession will be re-downloaded to a temp location',
name: 'doSraDownloadIfNeeded',
inputValue: true,
uncheckedValue: false,
checked: true,
xtype: 'checkbox'
}, this.getSaveTemplateCfg(),{
fieldLabel: 'Submit Jobs To Same Folder/Workbook As Readset?',
helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.labkey.sequenceanalysis.analysis;

import com.google.common.io.Files;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
Expand All @@ -22,6 +23,7 @@
import org.labkey.api.sequenceanalysis.model.Readset;
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
Expand Down Expand Up @@ -111,6 +113,10 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc
{
getAndValidateHeaderForVcf(so, newRsName);
}
else
{
throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath());
}

ctx.getSequenceSupport().cacheObject("readsetId", newRsName);
}
Expand Down Expand Up @@ -207,6 +213,18 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
String existingSample = header.getGenotypeSamples().get(0);

File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt");
if (!sampleNamesFile.exists())
{
try
{
Files.touch(sampleNamesFile);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND))
{
writer.println(newRsName);
Expand All @@ -225,11 +243,19 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
try
{
File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false);
FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
if (so.getFile().exists())
{
so.getFile().delete();
}
FileUtils.moveFile(outputVcf, so.getFile());

FileType gz = new FileType(".gz");
File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX);
FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING);
if (inputIndex.exists())
{
inputIndex.delete();
}
FileUtils.moveFile(outputIdx, inputIndex);

addTracker(so, existingSample, newRsName);
}
Expand All @@ -243,6 +269,11 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new
{
File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt");
boolean preExisting = tracker.exists();
if (!preExisting)
{
Files.touch(tracker);
}

try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND))
{
if (!preExisting)
Expand Down Expand Up @@ -279,20 +310,36 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
}

ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
if (rg == null)
{
throw new PipelineJobException("Unable to find genome: " + so.getLibrary_id());
}

ctx.getFileManager().addIntermediateFile(headerBam);
ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));

File output = new File(ctx.getWorkingDirectory(), so.getFile().getName());
new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam);
new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam, rg);
if (!output.exists())
{
throw new PipelineJobException("Missing file: " + output.getPath());
}

File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false);

FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING);
if (so.getFile().exists())
{
so.getFile().delete();
}
FileUtils.moveFile(output, so.getFile());

File inputIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile());
if (inputIndex.exists())
{
inputIndex.delete();
}
FileUtils.moveFile(outputIdx, inputIndex);

addTracker(so, existingSample, newRsName);
}
Expand All @@ -315,7 +362,7 @@ protected String getToolName()
return "ReplaceSamHeader";
}

public void execute(File input, File output, File headerBam) throws PipelineJobException
public void execute(File input, File output, File headerBam, ReferenceGenome genome) throws PipelineJobException
{
List<String> params = new ArrayList<>(getBaseArgs());

Expand All @@ -328,6 +375,9 @@ public void execute(File input, File output, File headerBam) throws PipelineJobE
params.add("--HEADER");
params.add(headerBam.getPath());

params.add("-R");
params.add(genome.getWorkingFastaFile().getPath());

execute(params);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.labkey.api.pipeline.RecordedAction;
import org.labkey.api.pipeline.RecordedActionSet;
import org.labkey.api.pipeline.WorkDirectoryTask;
import org.labkey.api.sequenceanalysis.model.ReadData;
import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory;
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep;
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
Expand Down Expand Up @@ -106,7 +107,15 @@ public RecordedActionSet run() throws PipelineJobException

if (getPipelineJob().getReadset().hasArchivedData())
{
throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
if (!getPipelineJob().shouldAllowArchivedReadsets())
{
throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
}

if (getPipelineJob().getReadset().getReadData().stream().filter(ReadData::isArchived).filter(rd -> rd.getSra_accession() == null).count() > 1)
{
throw new PipelineJobException("The input readset has archived readsets that lack SRA accessions");
}
}

getHelper().cacheExpDatasForParams();
Expand Down
Loading