From 0c737221de2a8db2c4772d999ef3e1a49ddd0076 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 2 Jan 2024 14:21:48 -0800 Subject: [PATCH 01/45] Improve error message --- jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index 5176ddcdc..c5e1889a9 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -218,7 +218,7 @@ else if (numericQueryParserFields.contains(fieldName)) } else { - throw new IllegalArgumentException("No such field(s), or malformed query."); + throw new IllegalArgumentException("No such field(s), or malformed query: " + queryString + ", field: " + fieldName); } booleanQueryBuilder.add(query, BooleanClause.Occur.MUST); From 0a65541d61ec602cad376018a4e7b0d5866ce973 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 4 Jan 2024 12:40:40 -0800 Subject: [PATCH 02/45] Update Scatter/Gather logic to sort on contig size --- .../run/analysis/PbsvJointCallingHandler.java | 13 ++++++----- .../util/ScatterGatherUtils.java | 22 ++++++++++++------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java index 429405bb5..5453e65be 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java @@ -151,14 +151,17 @@ public void processFilesRemote(List inputFiles, JobContext c List outputs = new ArrayList<>(); if (getVariantPipelineJob(ctx.getJob()) != null && getVariantPipelineJob(ctx.getJob()).isScatterJob()) { - for (Interval i : getVariantPipelineJob(ctx.getJob()).getIntervalsForTask()) + int idx = 0; + List intervals = getVariantPipelineJob(ctx.getJob()).getIntervalsForTask(); + for (Interval i : intervals) { + idx++; if (i.getStart() != 1) { throw new PipelineJobException("Expected all intervals to start on the first base: " + i); } - File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig(), jobCompleted); + File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig(), (" (" + idx + " of " + intervals.size() + ")"), jobCompleted); if (o != null) { outputs.add(o); @@ -167,7 +170,7 @@ public void processFilesRemote(List inputFiles, JobContext c } else { - outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName, null, jobCompleted)); + outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName, null, null, jobCompleted)); } try @@ -228,11 +231,11 @@ public void processFilesRemote(List inputFiles, JobContext c } } - private File runPbsvCall(JobContext ctx, List inputs, ReferenceGenome genome, String outputBaseName, @Nullable String contig, boolean jobCompleted) throws PipelineJobException + private File runPbsvCall(JobContext ctx, List inputs, ReferenceGenome genome, String outputBaseName, @Nullable String contig, @Nullable String statusSuffix, boolean jobCompleted) throws PipelineJobException { if (contig != null) { - ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + contig); + ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + contig + (statusSuffix == null ? "" : statusSuffix)); } if (inputs.isEmpty()) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ScatterGatherUtils.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ScatterGatherUtils.java index 1d02a8f41..4affbe7e2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ScatterGatherUtils.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ScatterGatherUtils.java @@ -7,6 +7,8 @@ import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -114,7 +116,11 @@ private void addInterval(String refName, int start, int end) public static LinkedHashMap> divideGenome(SAMSequenceDictionary dict, int optimalBasesPerJob, boolean allowSplitChromosomes, int maxContigsPerJob) { ActiveIntervalSet ais = new ActiveIntervalSet(optimalBasesPerJob, allowSplitChromosomes, maxContigsPerJob); - for (SAMSequenceRecord rec : dict.getSequences()) + + // Sort the sequences in descending length, rather than alphabetic on name: + List sortedSeqs = new ArrayList<>(dict.getSequences()); + sortedSeqs.sort(Comparator.comparingInt(SAMSequenceRecord::getSequenceLength).reversed()); + for (SAMSequenceRecord rec : sortedSeqs) { ais.add(rec); } @@ -148,8 +154,8 @@ public void testScatter() SAMSequenceDictionary dict = getDict(); Map> ret = divideGenome(dict, 1000, true, -1); assertEquals("Incorrect number of jobs", 8, ret.size()); - assertEquals("Incorrect interval end", 2000, ret.get("Job3").get(0).getEnd()); - assertEquals("Incorrect start", 1001, ret.get("Job3").get(0).getStart()); + assertEquals("Incorrect interval end", 1000, ret.get("Job3").get(0).getEnd()); + assertEquals("Incorrect start", 1, ret.get("Job3").get(0).getStart()); assertEquals("Incorrect interval end", 4, ret.get("Job8").size()); Map> ret2 = divideGenome(dict, 3000, false, -1); @@ -183,12 +189,12 @@ public void testScatter() } Map> ret5 = divideGenome(dict, 750, true, -1); - assertEquals("Incorrect number of jobs", 10, ret5.size()); - assertEquals("Incorrect interval end", 1000, ret5.get("Job1").get(0).getEnd()); - assertEquals("Incorrect interval end", 4, ret5.get("Job10").size()); + assertEquals("Incorrect number of jobs", 9, ret5.size()); + assertEquals("Incorrect interval end", 750, ret5.get("Job1").get(0).getEnd()); + assertEquals("Incorrect interval end", 4, ret5.get("Job9").size()); - assertEquals("Incorrect interval start", 751, ret5.get("Job3").get(0).getStart()); - assertEquals("Incorrect interval start", 1501, ret5.get("Job8").get(0).getStart()); + assertEquals("Incorrect interval start", 1501, ret5.get("Job3").get(0).getStart()); + assertEquals("Incorrect interval start", 1, ret5.get("Job8").get(0).getStart()); Map> ret6 = divideGenome(dict, 5000, false, 2); assertEquals("Incorrect number of jobs", 5, ret6.size()); From 59df4895495bc0ec2f6cdbeaf4da24a350d00cce Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 6 Jan 2024 07:37:00 -0800 Subject: [PATCH 03/45] Add server-side action to facilitate readset file archival --- .../sequence_readsets/SRA Info.qview.xml | 2 + .../window/ArchiveReadsetsWindow.js | 69 ++++++++ .../SequenceAnalysisController.java | 154 ++++++++++++++++++ .../SequenceAnalysisModule.java | 2 + .../button/ArchiveReadsetsButton.java | 23 +++ .../views/singleCellDataManagement.html | 31 ++++ 6 files changed, 281 insertions(+) create mode 100644 SequenceAnalysis/resources/web/SequenceAnalysis/window/ArchiveReadsetsWindow.js create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/button/ArchiveReadsetsButton.java diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/SRA Info.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/SRA Info.qview.xml index dd3328383..10a6f3edf 100644 --- a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/SRA Info.qview.xml +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/SRA Info.qview.xml @@ -17,6 +17,8 @@ + + diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/window/ArchiveReadsetsWindow.js b/SequenceAnalysis/resources/web/SequenceAnalysis/window/ArchiveReadsetsWindow.js new file mode 100644 index 000000000..ca1514d1a --- /dev/null +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/window/ArchiveReadsetsWindow.js @@ -0,0 +1,69 @@ +Ext4.define('SequenceAnalysis.window.ArchiveReadsetsWindow', { + extend: 'Ext.window.Window', + + statics: { + buttonHandler: function(dataRegionName){ + Ext4.create('SequenceAnalysis.window.ArchiveReadsetsWindow', { + dataRegionName: dataRegionName, + readsetIds: LABKEY.DataRegions[dataRegionName].getChecked() + }).show(); + } + }, + + initComponent: function() { + Ext4.apply(this, { + modal: true, + title: 'Archive Readsets', + width: 600, + bodyStyle: 'padding: 5px;', + defaults: { + border: false + }, + items: [{ + html: 'This helper will delete the actual FASTQ files associated with the selected readsets. It will error unless each readdata row has an SRA accession listed. You selected ' + this.readsetIds.length + ' readsets.', + style: 'padding-bottom: 10px;' + }], + buttons: [{ + text: 'Submit', + scope: this, + handler: this.onSubmit + },{ + text: 'Cancel', + handler: function(btn){ + btn.up('window').close(); + } + }] + }); + + this.callParent(arguments); + }, + + onSubmit: function(btn){ + if (!this.readsetIds.length) { + Ext4.Msg.alert('Error', 'No readsets selected!'); + return; + } + + Ext4.Msg.wait('Saving...'); + LABKEY.Ajax.request({ + url: LABKEY.ActionURL.buildURL('sequenceanalysis', 'archiveReadsets', null), + method: 'POST', + jsonData: { + readsetIds: this.readsetIds + }, + scope: this, + success: function(){ + Ext4.Msg.hide(); + this.close(); + Ext4.Msg.alert('Success', 'Readsets archived!', function(){ + if (this.dataRegionName){ + LABKEY.DataRegions[this.dataRegionName].clearSelected(); + } + + LABKEY.DataRegions[this.dataRegionName].refresh(); + }, this); + }, + failure: LABKEY.Utils.getCallbackWrapper(LDK.Utils.getErrorCallback()) + }); + } +}); \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 6280c31d6..11d319e7f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -77,6 +77,7 @@ import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.files.FileContentService; import org.labkey.api.laboratory.NavItem; +import org.labkey.api.laboratory.security.LaboratoryAdminPermission; import org.labkey.api.module.Module; import org.labkey.api.module.ModuleHtmlView; import org.labkey.api.module.ModuleLoader; @@ -5128,4 +5129,157 @@ public void setDataFileUrl(String dataFileUrl) _dataFileUrl = dataFileUrl; } } + + @RequiresPermission(UpdatePermission.class) + public static class ArchiveReadsetsAction extends MutatingApiAction + { + @Override + public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throws Exception + { + if (form.getReadsetIds() == null || form.getReadsetIds().length == 0) + { + errors.reject(ERROR_MSG, "No readset Ids provided"); + return null; + } + + TableInfo readData = QueryService.get().getUserSchema(getUser(), getContainer(), SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READ_DATA); + for (int readsetId : form.getReadsetIds()) + { + Readset rs = SequenceAnalysisService.get().getReadset(readsetId, getUser()); + Container c = ContainerManager.getForId(rs.getContainer()); + if (!getContainer().equals(c)) + { + Container toTest = c.isWorkbook() ? c.getParent() : c; + if (!getContainer().equals(toTest)) + { + errors.reject(ERROR_MSG, "Readset is not from this container: " + readsetId); + return null; + } + } + + if (!c.hasPermission(getUser(), LaboratoryAdminPermission.class)) + { + errors.reject(ERROR_MSG, "Insufficient permissions to archive readsets in the folder: " + c.getPath()); + return null; + } + + Set toDelete = new HashSet<>(); + List> toUpdate = new ArrayList<>(); + for (ReadData rd : rs.getReadData()) + { + if (rd.getSra_accession() == null) + { + errors.reject(ERROR_MSG, "Cannot mark a readdata as archived that does not have an SRA accession: " + readsetId + " / " + rd.getRowid()); + return null; + } + + toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", rd.getRowid(), "archived", true, "container", rd.getContainer()))); + + // File 1: + ExpData d1 = ExperimentService.get().getExpData(rd.getFileId1()); + if (d1 != null) + { + File file1 = d1.getFile(); + if (file1 != null && file1.exists()) + { + toDelete.add(file1); + } + + // find matching readdata: + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("fileid1/dataFileUrl"), d1.getDataFileUrl()).addCondition(FieldKey.fromString("rowid"), rd.getRowid(), CompareType.NEQ); + TableSelector ts = new TableSelector(readData, PageFlowUtil.set("rowid", "container"), filter, null); + if (ts.exists()) + { + ts.forEachResults(r -> { + toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", r.getInt(FieldKey.fromString("rowid")), "archived", true, "container", r.getString(FieldKey.fromString("container"))))); + }); + } + } + + if (rd.getFileId2() != null) + { + ExpData d2 = ExperimentService.get().getExpData(rd.getFileId2()); + if (d2 != null) + { + File file2 = d2.getFile(); + if (file2 != null) + { + if (file2.exists()) + { + toDelete.add(file2); + } + + // find matching readdata: + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("fileid2/dataFileUrl"), d2.getDataFileUrl()).addCondition(FieldKey.fromString("rowid"), rd.getRowid(), CompareType.NEQ); + TableSelector ts = new TableSelector(readData, PageFlowUtil.set("rowid", "container"), filter, null); + if (ts.exists()) + { + ts.forEachResults(r -> { + toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", r.getInt(FieldKey.fromString("rowid")), "archived", true, "container", r.getString(FieldKey.fromString("container"))))); + }); + } + } + } + } + } + + if (!toUpdate.isEmpty()) + { + List> keys = new ArrayList<>(); + toUpdate.forEach(row -> { + + keys.add(new CaseInsensitiveHashMap<>(Map.of("rowid", row.get("rowid")))); + }); + + try + { + readData.getUpdateService().updateRows(getUser(), getContainer(), toUpdate, keys, null, null); + } + catch (Exception e) + { + _log.error(e); + errors.reject(ERROR_MSG, "Error archiving readset: " + readsetId + ", " + e.getMessage()); + return null; + } + } + + if (!toDelete.isEmpty()) + { + for (File f : toDelete) + { + _log.info("Deleting archived file: " + f.getPath()); + f.delete(); + } + } + } + + return new ApiSimpleResponse("Success", true); + } + } + + public static class ArchiveReadsetsForm + { + private int[] _readsetIds; + private boolean _doNotRequireSra; + + public int[] getReadsetIds() + { + return _readsetIds; + } + + public void setReadsetIds(int... readsetIds) + { + _readsetIds = readsetIds; + } + + public boolean isDoNotRequireSra() + { + return _doNotRequireSra; + } + + public void setDoNotRequireSra(boolean doNotRequireSra) + { + _doNotRequireSra = doNotRequireSra; + } + } } \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index f392f46b5..fa2030266 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -58,6 +58,7 @@ import org.labkey.sequenceanalysis.analysis.SbtGeneCountHandler; import org.labkey.sequenceanalysis.analysis.UnmappedSequenceBasedGenotypeHandler; import org.labkey.sequenceanalysis.button.AddSraRunButton; +import org.labkey.sequenceanalysis.button.ArchiveReadsetsButton; import org.labkey.sequenceanalysis.button.ChangeReadsetStatusButton; import org.labkey.sequenceanalysis.button.ChangeReadsetStatusForAnalysesButton; import org.labkey.sequenceanalysis.button.DownloadSraButton; @@ -396,6 +397,7 @@ public void doStartupAfterSpringConfig(ModuleContext moduleContext) LDKService.get().registerQueryButton(new AddSraRunButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); LDKService.get().registerQueryButton(new RunMultiQCButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); LDKService.get().registerQueryButton(new DownloadSraButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); + LDKService.get().registerQueryButton(new ArchiveReadsetsButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); LDKService.get().registerQueryButton(new ChangeReadsetStatusForAnalysesButton(), "sequenceanalysis", "sequence_analyses"); LDKService.get().registerQueryButton(new ChangeReadsetStatusButton(), "sequenceanalysis", "sequence_readsets"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/button/ArchiveReadsetsButton.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/button/ArchiveReadsetsButton.java new file mode 100644 index 000000000..e19f572c3 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/button/ArchiveReadsetsButton.java @@ -0,0 +1,23 @@ +package org.labkey.sequenceanalysis.button; + +import org.labkey.api.laboratory.security.LaboratoryAdminPermission; +import org.labkey.api.ldk.table.SimpleButtonConfigFactory; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.security.permissions.AdminPermission; +import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.api.view.template.ClientDependency; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; + +import java.util.Arrays; + +/** + * Created by bimber on 7/12/2017. + */ +public class ArchiveReadsetsButton extends SimpleButtonConfigFactory +{ + public ArchiveReadsetsButton() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Archive Readsets", "SequenceAnalysis.window.ArchiveReadsetsWindow.buttonHandler(dataRegionName);", Arrays.asList(ClientDependency.supplierFromModuleName("ldk"), ClientDependency.supplierFromModuleName("laboratory"), ClientDependency.supplierFromPath("sequenceanalysis/window/ArchiveReadsetsWindow.js"))); + setPermission(LaboratoryAdminPermission.class); + } +} \ No newline at end of file diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index 64c42873c..1b977ba47 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -68,6 +68,37 @@ 'query.isArchived~eq': 0 } ) + },{ + name: 'CITE-seq/Hashing Readsets Possibly Needing Archival', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { + schemaName: 'sequenceanalysis', + queryName: 'sequence_readsets', + viewName: 'SRA Info', + 'query.totalForwardReads~isnonblank': null, + 'query.application~containsoneof': 'Cell Hashing;CITE-Seq', + 'query.totalOutputs~gt': 0, + 'query.isArchived~eq': 0, + 'query.readdataWithoutSra~eq': 0, + 'query.sort': 'name' + } + ) + },{ + name: 'TCR Readsets Possibly Needing Archival', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { + schemaName: 'sequenceanalysis', + queryName: 'sequence_readsets', + viewName: 'SRA Info', + 'query.totalForwardReads~isnonblank': null, + 'query.isArchived~eq': 0, + 'query.readdataWithoutSra~eq': 0, + 'query.totalAlignments~gt': 0, + 'query.application~contains': 'single', + 'query.status~isblank': null, + 'query.librarytype~doesnotcontain': 'BCR', + 'query.librarytype~contains': 'VDJ', + 'query.sort': 'name' + } + ) }, { name: 'Analyses Using Old Readsets', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { From 4c36b988d2eb40a12d0714f3e93dce07243af8eb Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 7 Jan 2024 14:05:54 -0800 Subject: [PATCH 04/45] - Add additional fields to VCF/lucene indexing - Update scRNA-seq archive links --- .../org/labkey/jbrowse/JBrowseFieldUtils.java | 16 ++++++++++------ .../views/singleCellDataManagement.html | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java b/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java index 0633774f2..95951cdfe 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java @@ -75,12 +75,10 @@ public static Map getIndexedFields(JsonFile json public static Map getGenotypeDependentFields(@Nullable JsonFile jsonFile) { Map ret = new HashMap<>(); ret.put(VARIABLE_SAMPLES, new JBrowseFieldDescriptor(VARIABLE_SAMPLES, "All samples with this variant", true, true, VCFHeaderLineType.Character, 7).multiValued(true).label("Samples With Variant")); - - // TODO: restore these once existing indexes are regenerated: - // ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes")); - // ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant")); - // ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called")); - // ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes")); + ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes")); + ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant")); + ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called")); + ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes")); if (jsonFile != null) { File vcf = jsonFile.getTrackFile(); @@ -99,6 +97,12 @@ public static Map getGenotypeDependentFields(@Nu } else { + ret.put(VARIABLE_SAMPLES, new JBrowseFieldDescriptor(VARIABLE_SAMPLES, "All samples with this variant", true, true, VCFHeaderLineType.Character, 7).multiValued(true).label("Samples With Variant")); + ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes")); + ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant")); + ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called")); + ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes")); + ret.get(VARIABLE_SAMPLES).allowableValues(header.getSampleNamesInOrder()); } } diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index 1b977ba47..a91800cfb 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -91,7 +91,7 @@ 'query.totalForwardReads~isnonblank': null, 'query.isArchived~eq': 0, 'query.readdataWithoutSra~eq': 0, - 'query.totalAlignments~gt': 0, + 'query.readset/outputFileTypes~contains': '10x VLoupe', 'query.application~contains': 'single', 'query.status~isblank': null, 'query.librarytype~doesnotcontain': 'BCR', From 06d625abc0da1fc719b27d90745d4b4ec27936cc Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 7 Jan 2024 20:34:07 -0800 Subject: [PATCH 05/45] Update params on query URLs --- singlecell/resources/views/singleCellDataManagement.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index a91800cfb..bba369ea0 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -73,7 +73,7 @@ url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', - viewName: 'SRA Info', + 'query.viewName': 'SRA Info', 'query.totalForwardReads~isnonblank': null, 'query.application~containsoneof': 'Cell Hashing;CITE-Seq', 'query.totalOutputs~gt': 0, @@ -87,11 +87,11 @@ url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', - viewName: 'SRA Info', + 'query.viewName': 'SRA Info', 'query.totalForwardReads~isnonblank': null, 'query.isArchived~eq': 0, 'query.readdataWithoutSra~eq': 0, - 'query.readset/outputFileTypes~contains': '10x VLoupe', + 'query.outputFileTypes~contains': '10x VLoupe', 'query.application~contains': 'single', 'query.status~isblank': null, 'query.librarytype~doesnotcontain': 'BCR', From 002550f177a35748dcd2bc6495754dafb37a53df Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 8 Jan 2024 12:50:40 -0800 Subject: [PATCH 06/45] Force inclusion of retainAmbiguousFeatures param --- .../labkey/singlecell/pipeline/singlecell/AppendNimble.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index bd813c840..fa71db90f 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -7,6 +7,7 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; import org.labkey.api.singlecell.pipeline.SingleCellStep; import java.util.Arrays; @@ -31,14 +32,13 @@ public Provider() {{ put("allowBlank", false); }}, null), - ToolParameterDescriptor.create("retainAmbiguousFeatures", "Retain Ambiguous Features", "If checked, features hitting more than one reference will be retained", "checkbox", new JSONObject() + SeuratToolParameter.create("retainAmbiguousFeatures", "Retain Ambiguous Features", "If checked, features hitting more than one reference will be retained", "checkbox", new JSONObject() {{ put("check", false); - }}, false) + }}, false, null, true) ), Arrays.asList("sequenceanalysis/field/GenomeField.js", "/singlecell/panel/NimbleAppendPanel.js"), null); } - @Override public AppendNimble create(PipelineContext ctx) { From 98cfa801e8c4275a78e2452534967ba46757b8c6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 9 Jan 2024 13:57:54 -0600 Subject: [PATCH 07/45] Fix loading lifecycle flow when getting session (#259) Co-authored-by: Sebastian Benjamin --- .../JBrowse/VariantSearch/VariantTable.tsx | 17 +------ .../components/VariantTableWidget.tsx | 49 ++++++++----------- 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/jbrowse/src/client/JBrowse/VariantSearch/VariantTable.tsx b/jbrowse/src/client/JBrowse/VariantSearch/VariantTable.tsx index b1c84e4ee..75dd70709 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/VariantTable.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/VariantTable.tsx @@ -45,7 +45,7 @@ function VariantTable() { const [session, setSession] = useState(null) const [state, setState] = useState(null) - const [theme, setTheme] = useState(null) + const [theme, setTheme] = useState(createTheme()) const [view, setView] = useState(null) const [parsedLocString, setParsedLocString] = useState(null) const [assemblyNames, setAssemblyNames] = useState(null) @@ -86,24 +86,11 @@ function VariantTable() { fetchSession(queryParam, sessionId, nativePlugins, refTheme, setState, true, [trackId], undefined, successCallback, trackId) }, []); - // Error handle and then render the component - if (view === null || theme == null) { - return () - } - else if (view === "invalid" || state == "invalid") { - return (

Error fetching config. See console for more details

) - } - - if (!assemblyNames.length) { - return (

No configured assemblies

) - } - return (
- +
diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index 9dd0d31ec..f9df7941c 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -36,21 +36,20 @@ import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' import { lastValueFrom } from 'rxjs'; const VariantTableWidget = observer(props => { - const { assembly, trackId, parsedLocString, sessionId, session, pluginManager } = props - const { assemblyNames, assemblyManager } = session - const { view } = session - - // The code expects a proper GUID, yet the trackId is a string containing the GUID + filename - const trackGUID = truncateToValidGUID(props.trackId) - - // NOTE: since the trackId is GUID+filename, allow exact string matching, or a match on the GUID portion alone. - // Upstream code might only have access to the GUID and translating to the trackId isnt always easy - const track = view.tracks.find( - t => t.configuration.trackId === trackId || truncateToValidGUID(t.configuration.trackId).toUpperCase() === trackGUID.toUpperCase() - ) - - if (!track) { - return (

Unknown track: {trackId}

) + const { assembly, trackId, parsedLocString, sessionId, session, pluginManager } = props; + const { assemblyNames = [], assemblyManager } = session ?? {}; + const { view } = session ?? {}; + + var track = undefined; + var trackGUID = undefined; + if(view && trackId) { + // The code expects a proper GUID, yet the trackId is a string containing the GUID + filename + // NOTE: since the trackId is GUID+filename, allow exact string matching, or a match on the GUID portion alone. + // Upstream code might only have access to the GUID and translating to the trackId isnt always easy + trackGUID = truncateToValidGUID(props.trackId) + track = view.tracks.find( + t => t.configuration.trackId === trackId || truncateToValidGUID(t.configuration.trackId).toUpperCase() === trackGUID.toUpperCase() + ) } function handleSearch(data) { @@ -150,7 +149,6 @@ const VariantTableWidget = observer(props => { {displayValue} {renderPopover && - // TODO { const [activeWidgetList, setActiveWidgetList] = useState([]) // False until initial data load or an error: - const [dataLoaded, setDataLoaded] = useState(!parsedLocString) + const [dataLoaded, setDataLoaded] = useState(false) const urlParams = new URLSearchParams(window.location.search); const page = parseInt(urlParams.get('page') || '0'); @@ -278,20 +276,15 @@ const VariantTableWidget = observer(props => { }) } - fetch() + if(sessionId && trackGUID) { + fetch() + } + return () => { window.removeEventListener('popstate', handlePopState); }; - }, [pluginManager, parsedLocString, session.visibleWidget]) - - if (!view) { - return - } - - if (!track) { - return(

Unable to find track: {trackId}

) - } + }, [pluginManager, parsedLocString, session?.visibleWidget, sessionId, trackGUID]) if (error) { throw new Error(error) @@ -435,7 +428,7 @@ const VariantTableWidget = observer(props => { { - [...session.activeWidgets].map((elem) => { + [...(session?.activeWidgets ?? [])].map((elem) => { const widget = elem[1] const widgetType = pluginManager.getWidgetType(widget.type) const { ReactComponent } = widgetType From fa471aa6b18cf2fd564bb77f356e02c392afb292 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 9 Jan 2024 17:10:53 -0800 Subject: [PATCH 08/45] Support maxLibrarySizeRatio for nimble --- singlecell/resources/chunks/AppendNimble.R | 2 +- .../labkey/singlecell/pipeline/singlecell/AppendNimble.java | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 23874d8d1..57fbb6f6a 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures) + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) } saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index fa71db90f..111c2be2d 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -35,7 +35,11 @@ public Provider() SeuratToolParameter.create("retainAmbiguousFeatures", "Retain Ambiguous Features", "If checked, features hitting more than one reference will be retained", "checkbox", new JSONObject() {{ put("check", false); - }}, false, null, true) + }}, false, null, true), + SeuratToolParameter.create("maxLibrarySizeRatio", "Max Library Size Ratio", "This normalization relies on the assumption that the library size of the assay being normalized in negligible relative to the assayForLibrarySize. To verify this holds true, the method will error if librarySize(assayToNormalize)/librarySize(assayForLibrarySize) exceeds this value", "ldk-numberfield", new JSONObject() + {{ + put("decimalPrecision", 4); + }}, 0.1, null, true) ), Arrays.asList("sequenceanalysis/field/GenomeField.js", "/singlecell/panel/NimbleAppendPanel.js"), null); } From 42d8c75da79b493bb32ddadca18e8a67d08ce1d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 19:54:23 -0800 Subject: [PATCH 09/45] Bump follow-redirects from 1.15.3 to 1.15.4 in /jbrowse (#260) Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.15.3 to 1.15.4. - [Release notes](https://github.com/follow-redirects/follow-redirects/releases) - [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.15.3...v1.15.4) --- updated-dependencies: - dependency-name: follow-redirects dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jbrowse/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json index 1b9cf0ee5..625de5a5e 100644 --- a/jbrowse/package-lock.json +++ b/jbrowse/package-lock.json @@ -9042,9 +9042,9 @@ } }, "node_modules/follow-redirects": { - "version": "1.15.3", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", - "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "version": "1.15.4", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.4.tgz", + "integrity": "sha512-Cr4D/5wlrb0z9dgERpUL3LrmPKVDsETIJhaCMeDfuFYcqa5bldGV6wBsAN6X/vxlXQtFBMrXdXxdL8CbDTGniw==", "dev": true, "funding": [ { From f97bddb24dc1547cd613dae92bcaa3cfe5f91eae Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 13 Jan 2024 08:17:41 -0800 Subject: [PATCH 10/45] Update defaults for TCR and update repseqio dependencies --- singlecell/resources/chunks/SeuratPrototype.R | 2 +- .../org/labkey/singlecell/pipeline/singlecell/AppendTcr.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/chunks/SeuratPrototype.R b/singlecell/resources/chunks/SeuratPrototype.R index 94ad664fc..0e5a11594 100644 --- a/singlecell/resources/chunks/SeuratPrototype.R +++ b/singlecell/resources/chunks/SeuratPrototype.R @@ -71,7 +71,7 @@ for (datasetId in names(seuratObjects)) { if ('Saturation.RNA' %in% names(seuratObj@meta.data)) { meanSaturation.RNA <- mean(seuratObj$Saturation.RNA) if (!is.null(minSaturation) && meanSaturation.RNA < minSaturation) { - addErrorMessage(paste0('Mean RNA saturation was: ', meanSaturation.RNA, ' for dataset: ', datasetId, ', below threshold of: ', minSaturation)) + addErrorMessage(paste0('Mean RNA saturation was: ', meanSaturation.RNA, ' for dataset: ', datasetId, ', below threshold of: ', minSaturation, ', total cells: ', ncol(seuratObj))) } metricData <- rbind(metricData, data.frame(dataId = datasetId, readsetId = datasetIdToReadset[[datasetId]], metricname = 'MeanSaturation.RNA', metricvalue = meanSaturation.RNA)) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendTcr.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendTcr.java index 728cbd204..0176d2766 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendTcr.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendTcr.java @@ -23,8 +23,8 @@ public Provider() super("AppendTcr", "Append TCR Data", "RDiscvr", "This uses Rdiscvr::DownloadAndAppendTcrClonotypes to append TCR data.", List.of( SeuratToolParameter.create("allowMissing", "Allow Missing Data", "If checked, an error will be thrown if any sample lacks TCR data", "checkbox", new JSONObject() {{ - put("checked", true); - }}, true) + put("checked", false); + }}, false) ), null, null); } From d7df3ca6d2a28af829b3925711922bef4126724d Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 16 Jan 2024 13:10:53 -0800 Subject: [PATCH 11/45] Allow alternate action name for MergeVcfsAndGenotypesHandler --- .../SequenceOutputHandlerRemoteTask.java | 2 +- .../variant/MergeVcfsAndGenotypesHandler.java | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerRemoteTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerRemoteTask.java index c7250b5f1..06cf83837 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerRemoteTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerRemoteTask.java @@ -57,7 +57,7 @@ public String getStatusName() public List getProtocolActionNames() { List allowableNames = new ArrayList<>(); - for (SequenceOutputHandler handler : SequenceAnalysisServiceImpl.get().getFileHandlers(SequenceOutputHandler.TYPE.OutputFile)) + for (SequenceOutputHandler handler : SequenceAnalysisServiceImpl.get().getFileHandlers(SequenceOutputHandler.TYPE.OutputFile)) { allowableNames.add(handler.getName()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java index e3609dd9e..4b4957590 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java @@ -11,13 +11,14 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.PageFlowUtil; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.run.util.MergeVcfsAndGenotypesWrapper; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -25,7 +26,7 @@ /** * Created by bimber on 4/4/2017. */ -public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHandler +public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHandler implements SequenceOutputHandler.HasActionNames { public MergeVcfsAndGenotypesHandler() { @@ -58,7 +59,14 @@ public SequenceOutputProcessor getProcessor() return new Processor(); } - public static class Processor implements SequenceOutputProcessor + @Override + public Collection getAllowableActionNames() + { + // NOTE: Combine Variants only exists for legacy purposes: + return PageFlowUtil.set(getName(), "Combine Variants"); + } + + public class Processor implements SequenceOutputProcessor { @Override public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException @@ -71,7 +79,7 @@ public void processFilesRemote(List inputFiles, JobContext c { File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz"); - RecordedAction action = new RecordedAction("Combine Variants"); + RecordedAction action = new RecordedAction(getName()); Set genomeIds = new HashSet<>(); inputFiles.forEach(x -> genomeIds.add(x.getLibrary_id())); From 5cdc9411b46b4f2620abbc8fec2651a2f6ab51ae Mon Sep 17 00:00:00 2001 From: hextraza Date: Wed, 17 Jan 2024 13:02:41 -0800 Subject: [PATCH 12/45] Add column URL serialization (#261) * Serialize column visibility model to the URL * URL serialization tests + don't serialize full object --------- Co-authored-by: Sebastian Benjamin --- .../components/VariantTableWidget.tsx | 33 ++++++-- .../external/labModules/JBrowseTest.java | 78 +++++++++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index f9df7941c..afe99496c 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -226,8 +226,6 @@ const VariantTableWidget = observer(props => { const [fieldTypeInfo, setFieldTypeInfo] = useState([]); const [allowedGroupNames, setAllowedGroupNames] = useState([]); const [promotedFilters, setPromotedFilters] = useState>(null); - const [columnVisibilityModel, setColumnVisibilityModel] = useState({}); - const [adapter, setAdapter] = useState(null) // Active widget ID list to force rerender when a JBrowseUIButton is clicked @@ -241,6 +239,10 @@ const VariantTableWidget = observer(props => { const pageSize = parseInt(urlParams.get('pageSize') || '50'); const [pageSizeModel, setPageSizeModel] = React.useState({ page, pageSize }); + const colVisURLComponent = urlParams.get("colVisModel") || "{}" + const colVisModel = JSON.parse(decodeURIComponent(colVisURLComponent)) + const [columnVisibilityModel, setColumnVisibilityModel] = useState(colVisModel); + // API call to retrieve the requested features. useEffect(() => { const handlePopState = () => { @@ -261,9 +263,19 @@ const VariantTableWidget = observer(props => { setColumns(columns) - const columnVisibilityModel = {} - fields.filter((x) => !x.isHidden).forEach((x) => columnVisibilityModel[x.name] = !!x.isInDefaultColumns) - setColumnVisibilityModel(columnVisibilityModel) + if(JSON.stringify(columnVisibilityModel) === '{}') { + const defaultModel = {}; + fields.filter((x) => !x.isHidden).forEach((x) => { + defaultModel[x.name] = !!x.isInDefaultColumns; + }); + setColumnVisibilityModel(defaultModel); + } else { + const updatedModel = fields.reduce((acc, field) => { + acc[field.name] = columnVisibilityModel[field.name] === true; + return acc; + }, {}); + setColumnVisibilityModel(updatedModel); + } setFieldTypeInfo(fields) setAllowedGroupNames(groups) @@ -395,6 +407,17 @@ const VariantTableWidget = observer(props => { }} onColumnVisibilityModelChange={(model) => { setColumnVisibilityModel(model) + + const trueValuesModel = Object.keys(model).reduce((acc, key) => { + if (model[key] === true) { + acc[key] = true; + } + return acc; + }, {}); + + const currentUrl = new URL(window.location.href); + currentUrl.searchParams.set("colVisModel", encodeURIComponent(JSON.stringify(trueValuesModel))); + window.history.pushState(null, "", currentUrl.toString()); }} /> ) diff --git a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java index 4b8704d2b..57006b138 100644 --- a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java +++ b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java @@ -1813,5 +1813,83 @@ private void testLuceneSearchUI(String sessionId) waitForElement(Locator.tagWithText("span", "0.029")); clearFilterDialog("IMPACT equals HIGH,MODERATE"); + + testLuceneColumnSerialization(sessionId); + } + + private void testLuceneColumnSerializationFirstRow() { + WebElement locator = TOP_ROW.findElement(getDriver()); + + for (WebElement elem : locator.findElements(By.xpath("./child::*"))) { + String value = elem.getText(); + if (StringUtils.trimToNull(value) == null) + { + value = ""; + } + + if (StringUtils.isEmpty(elem.getText())) { + return; + } + + switch(elem.getAttribute("aria-colindex")) + { + case "1": + Assert.assertEquals(value, "1"); + break; + case "2": + Assert.assertEquals(value, "2"); + break; + case "3": + Assert.assertEquals(value, "A"); + break; + case "4": + Assert.assertEquals(value, "T"); + break; + case "6": + Assert.assertEquals(value, "0.029"); + break; + case "7": + Assert.assertEquals(value, "7.292"); + break; + case "8": + Assert.assertEquals(value, "HIGH"); + break; + } + } + } + + private void testLuceneColumnSerialization(String sessionId) { + beginAt("/" + getProjectName() + "/jbrowse-jbrowse.view?session=" + sessionId); + waitAndClick(Locator.tagContainingText("button", "Show all regions in assembly").withClass("MuiButtonBase-root")); + waitAndClick(Locator.tagWithText("p", "No tracks active.")); + waitAndClick(Locator.tagWithText("button", "Open track selector")); + + Locator l = Locator.tagWithText("span", "TestVCF").withClass("MuiFormControlLabel-label"); + waitAndClick(l); + getDriver().findElement(Locator.tag("body")).sendKeys(Keys.ESCAPE); //close modal + + openTrackMenuItem("Variant Search", true); + waitAndClick(Locator.tagWithAttribute("button", "aria-label", "Select columns")); + + Locator caddScoreToggle = Locator.tagWithAttribute("input", "name", "CADD_PH"); + waitForElement(caddScoreToggle); + WebElement parentOfCaddScoreToggle = caddScoreToggle.findElement(getDriver()).findElement(By.xpath("./..")); + parentOfCaddScoreToggle.click(); + + String colVisModelString = "%257B%2522contig%2522%253Atrue%252C%2522start%2522%253Atrue%252C%2522ref%2522%253Atrue%252C%2522alt%2522%253Atrue%252C%2522variableSamples%2522%253Atrue%252C%2522AF%2522%253Atrue%252C%2522CADD_PH%2522%253Atrue%252C%2522IMPACT%2522%253Atrue%257D"; + Assert.assertEquals(getUrlParam("colVisModel"), colVisModelString); + + getDriver().navigate().refresh(); + + waitForElement(TOP_ROW); + Assert.assertEquals(getUrlParam("colVisModel"), colVisModelString); + testLuceneColumnSerializationFirstRow(); + + waitAndClick(Locator.tagWithText("button", "Search")); + waitAndClick(Locator.tagWithClass("button", "filter-form-select-button")); + + waitForElement(TOP_ROW); + Assert.assertEquals(getUrlParam("colVisModel"), colVisModelString); + testLuceneColumnSerializationFirstRow(); } } \ No newline at end of file From b2dc95512277bc22f1851f569d5a28bd292224d5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 18 Jan 2024 21:14:28 -0800 Subject: [PATCH 13/45] Add support for DeepVariant --- .../SequenceAnalysisModule.java | 21 +- .../run/analysis/DeepVariantAnalysis.java | 258 ++++++++++++++++++ .../run/analysis/HaplotypeCallerAnalysis.java | 4 +- 3 files changed, 262 insertions(+), 21 deletions(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index fa2030266..1d095b04a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -78,25 +78,7 @@ import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; -import org.labkey.sequenceanalysis.run.analysis.BamIterator; -import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep; -import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis; -import org.labkey.sequenceanalysis.run.analysis.GenrichStep; -import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis; -import org.labkey.sequenceanalysis.run.analysis.ImmunoGenotypingAnalysis; -import org.labkey.sequenceanalysis.run.analysis.LofreqAnalysis; -import org.labkey.sequenceanalysis.run.analysis.MergeLoFreqVcfHandler; -import org.labkey.sequenceanalysis.run.analysis.NextCladeHandler; -import org.labkey.sequenceanalysis.run.analysis.PARalyzerAnalysis; -import org.labkey.sequenceanalysis.run.analysis.PangolinHandler; -import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis; -import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler; -import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis; -import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis; -import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis; -import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis; -import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler; -import org.labkey.sequenceanalysis.run.analysis.ViralAnalysis; +import org.labkey.sequenceanalysis.run.analysis.*; import org.labkey.sequenceanalysis.run.assembly.TrinityRunner; import org.labkey.sequenceanalysis.run.bampostprocessing.AddOrReplaceReadGroupsStep; import org.labkey.sequenceanalysis.run.bampostprocessing.BaseQualityScoreRecalibrator; @@ -281,6 +263,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new ImmunoGenotypingAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new ViralAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new HaplotypeCallerAnalysis.Provider()); + SequencePipelineService.get().registerPipelineStep(new DeepVariantAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new SnpCountAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new ExportOverlappingReadsAnalysis.Provider()); SequencePipelineService.get().registerPipelineStep(new SubreadAnalysis.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java new file mode 100644 index 000000000..4001158cb --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -0,0 +1,258 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.AnalysisModel; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.FileUtil; +import org.labkey.api.writer.PrintWriters; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * User: bimber + * Date: 7/3/2014 + * Time: 11:29 AM + */ +public class DeepVariantAnalysis extends AbstractCommandPipelineStep implements AnalysisStep +{ + public DeepVariantAnalysis(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new DeepVariantAnalysis.DeepVariantWrapper(ctx.getLogger())); + } + + public static class Provider extends AbstractAnalysisStepProvider + { + public Provider() + { + super("DeepVariantAnalysis", "DeepVariant", "DeepVariant", "This will run DeepVariant on the selected data to generate a gVCF.", getToolDescriptors(), null, null); + } + + @Override + public DeepVariantAnalysis create(PipelineContext ctx) + { + return new DeepVariantAnalysis(this, ctx); + } + } + + public static List getToolDescriptors() + { + return Arrays.asList( + ToolParameterDescriptor.create("modelType", "Model Type", "", "ldk-simplecombo", new JSONObject(){{ + put("storeValues", "AUTO;WGS;WES;PACBIO;ONT_R104;HYBRID_PACBIO_ILLUMINA"); + put("multiSelect", false); + put("allowBlank", false); + }}, "AUTO"), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--haploid_contigs"), "haploidContigs", "Haploid Contigs", "", "textfield", new JSONObject(){{ + + }}, "X,Y") + ); + } + + @Override + public void init(SequenceAnalysisJobSupport support) throws PipelineJobException + { + // TODO: handle auto-detection + String modelType = getProvider().getParameterByName("modelType").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + if (modelType == null) + { + throw new PipelineJobException("Missing model type"); + } + + if ("AUTO".equals(modelType)) + { + getPipelineCtx().getLogger().info("Inferring model type by readset type:"); + if (support.getCachedReadsets().size() != 1) + { + throw new PipelineJobException("Expected a single cached readset, found: " + support.getCachedReadsets().size()); + } + + Readset rs = support.getCachedReadsets().get(0); + if ("ILLUMINA".equals(rs.getPlatform())) + { + switch (rs.getApplication()) + { + case "Whole Genome: Deep Coverage": + modelType = "WGS"; + break; + case "Whole Genome: Light Coverage": + modelType = "WGS"; + break; + case "Whole Exome": + modelType = "WXS"; + break; + default: + throw new IllegalArgumentException("Unknown application: " + rs.getApplication()); + } + } + else if ("PACBIO".equals(rs.getPlatform())) + { + modelType = "PACBIO"; + } + + if ("AUTO".equals(modelType)) + { + throw new PipelineJobException("Unable to infer modelType for: " + rs.getName()); + } + + support.cacheObject("modelType", modelType); + } + } + + @Override + public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException + { + AnalysisOutputImpl output = new AnalysisOutputImpl(); + output.addInput(inputBam, "Input BAM File"); + + File outputFile = new File(outputDir, FileUtil.getBaseName(inputBam) + ".g.vcf.gz"); + File idxFile = new File(outputDir, FileUtil.getBaseName(inputBam) + ".g.vcf.gz.idx"); + + String inferredModelType = getPipelineCtx().getSequenceSupport().getCachedObject("modelType", String.class); + String modelType = inferredModelType == null ? getProvider().getParameterByName("modelType").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class) : inferredModelType; + if (modelType == null) + { + throw new PipelineJobException("Missing model type"); + } + + getWrapper().setOutputDir(outputDir); + getWrapper().setWorkingDir(outputDir); + getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, output, modelType, getClientCommandArgs()); + + output.addOutput(outputFile, "gVCF File"); + output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + if (idxFile.exists()) + { + output.addOutput(idxFile, "VCF Index"); + } + + return output; + } + + @Override + public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException + { + return null; + } + + public static class DeepVariantWrapper extends AbstractCommandWrapper + { + public DeepVariantWrapper(Logger logger) + { + super(logger); + } + + private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + { + try + { + if (workingDirectory.equals(input.getParentFile())) + { + return input; + } + + File local = new File(workingDirectory, input.getName()); + if (!local.exists()) + { + getLogger().debug("Copying file locally: " + input.getPath()); + FileUtils.copyFile(input, local); + } + + output.addIntermediateFile(local); + + return local; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutputTracker tracker, String modelType, List extraArgs) throws PipelineJobException + { + File workDir = outputGvcf.getParentFile(); + + File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker); + ensureLocalCopy(new File(inputBam.getPath() + ".bai"), workDir, tracker); + + File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker); + ensureLocalCopy(new File(refFastaLocal.getPath() + ".fai"), workDir, tracker); + ensureLocalCopy(new File(FileUtil.getBaseName(refFasta.getPath()) + ".dict"), workDir, tracker); + + File localBashScript = new File(workDir, "docker.sh"); + File dockerBashScript = new File(workDir, "dockerRun.sh"); + tracker.addIntermediateFile(localBashScript); + tracker.addIntermediateFile(dockerBashScript); + + String binVersion = ""; + List bashArgs = new ArrayList<>(Arrays.asList("/opt/deepvariant/bin/run_deepvariant")); + bashArgs.add("--ref=/work/" + refFastaLocal.getName()); + bashArgs.add("--reads=/work/" + inputBamLocal.getName()); + bashArgs.add("--output_gvcf=/work/" + outputGvcf.getName()); + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (maxThreads != null) + { + bashArgs.add("--num_shards=" + maxThreads); + } + + if (extraArgs != null) + { + bashArgs.addAll(extraArgs); + } + + try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) + { + writer.println("#!/bin/bash"); + writer.println("set -x"); + writer.println("WD=`pwd`"); + writer.println("HOME=`echo ~/`"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + writer.println("sudo $DOCKER pull google/deepvariant:" + binVersion); + writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("\t-v \"${WD}:/work\" \\"); + writer.println("\t-v \"${HOME}:/homeDir\" \\"); + writer.println("\t-u $UID \\"); + writer.println("\t-e TMPDIR=/work/tmpDir \\"); + writer.println("\t-e USERID=$UID \\"); + writer.println("\t-w /work \\"); + writer.println("\tgoogle/deepvariant:" + binVersion + " \\"); + writer.println("\t/work/" + dockerBashScript.getName()); + writer.println("EXIT_CODE=$?"); + writer.println("echo 'Docker run exit code: '$EXIT_CODE"); + writer.println("exit $EXIT_CODE"); + + dockerWriter.println("#!/bin/bash"); + dockerWriter.println("set -x"); + dockerWriter.println(StringUtils.join(bashArgs, " ")); + dockerWriter.println("EXIT_CODE=$?"); + dockerWriter.println("echo 'Exit code: '$?"); + dockerWriter.println("exit $EXIT_CODE"); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/HaplotypeCallerAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/HaplotypeCallerAnalysis.java index 7c3b036ca..fb2eb656a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/HaplotypeCallerAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/HaplotypeCallerAnalysis.java @@ -28,7 +28,7 @@ */ public class HaplotypeCallerAnalysis extends AbstractCommandPipelineStep implements AnalysisStep { - public HaplotypeCallerAnalysis(PipelineStepProvider provider, PipelineContext ctx) + public HaplotypeCallerAnalysis(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new HaplotypeCallerWrapper(ctx.getLogger())); } @@ -51,7 +51,7 @@ public static List getToolDescriptors() { return Arrays.asList( ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--dont-use-soft-clipped-bases"), "dontUseSoftClippedBases", "Don't Use Soft Clipped Bases", "If specified, we will not analyze soft clipped bases in the reads", "checkbox", null, false), - ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("max-alternate-alleles"), "maxAlternateAlleles", "Max Alternate Alleles", "Passed to --max-alternate-alleles", "ldk-integerfield", new JSONObject(){{ + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--max-alternate-alleles"), "maxAlternateAlleles", "Max Alternate Alleles", "Passed to --max-alternate-alleles", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); }}, 6) ); From ae8e9e4e0173af9ec201c13e0b188878f0e6894e Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jan 2024 05:37:06 -0800 Subject: [PATCH 14/45] Allow archived readsets for cell hashing --- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 4657ddf8f..5ebfc860b 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -294,7 +294,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo } } - support.cacheReadset(hashingReadsetId, job.getUser()); + support.cacheReadset(hashingReadsetId, job.getUser(), true); }); From 078b7ee5b20a72b06cc4bfc174a9772b9ab790e0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jan 2024 13:17:07 -0800 Subject: [PATCH 15/45] Add missing arguments to DeepVariant --- .../run/analysis/DeepVariantAnalysis.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 4001158cb..1e38284fd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -66,14 +66,16 @@ public static List getToolDescriptors() }}, "AUTO"), ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--haploid_contigs"), "haploidContigs", "Haploid Contigs", "", "textfield", new JSONObject(){{ - }}, "X,Y") + }}, "X,Y"), + ToolParameterDescriptor.create("binVersion", "DeepVariant Version", "The version of DeepVariant to run, which is passed to their docker container", "textfield", new JSONObject(){{ + put("allowBlank", false); + }}, "1.6.0") ); } @Override public void init(SequenceAnalysisJobSupport support) throws PipelineJobException { - // TODO: handle auto-detection String modelType = getProvider().getParameterByName("modelType").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); if (modelType == null) { @@ -136,12 +138,21 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc throw new PipelineJobException("Missing model type"); } + List args = new ArrayList<>(getClientCommandArgs()); + args.add("--model_type=" + modelType); + + String binVersion = getProvider().getParameterByName("binVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + if (binVersion == null) + { + throw new PipelineJobException("Missing binVersion"); + } + getWrapper().setOutputDir(outputDir); getWrapper().setWorkingDir(outputDir); - getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, output, modelType, getClientCommandArgs()); + getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, output, binVersion, args); output.addOutput(outputFile, "gVCF File"); - output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), "DeepVariant Version: " + binVersion); if (idxFile.exists()) { output.addOutput(idxFile, "VCF Index"); @@ -189,7 +200,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutputTracker tracker, String modelType, List extraArgs) throws PipelineJobException + public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutputTracker tracker, String binVersion, List extraArgs) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); @@ -205,7 +216,6 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu tracker.addIntermediateFile(localBashScript); tracker.addIntermediateFile(dockerBashScript); - String binVersion = ""; List bashArgs = new ArrayList<>(Arrays.asList("/opt/deepvariant/bin/run_deepvariant")); bashArgs.add("--ref=/work/" + refFastaLocal.getName()); bashArgs.add("--reads=/work/" + inputBamLocal.getName()); From 089ccec05293c3cfe78db8ab5752d2d7254304a6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jan 2024 15:45:40 -0800 Subject: [PATCH 16/45] Second instance of allowing hashing and archived readsets --- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 5ebfc860b..1df529330 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -356,7 +356,7 @@ else if (distinctHTOs.size() == 1) } } - support.cacheReadset(citeseqReadsetId, job.getUser()); + support.cacheReadset(citeseqReadsetId, job.getUser(), true); }); citeToRemove.forEach(readsetToCiteSeqMap::remove); From cd85ff25014059fd4bb71eb5af02e8b294df1a00 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jan 2024 15:56:19 -0800 Subject: [PATCH 17/45] Allow DeepVariant to run on CRAMs --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 1e38284fd..2d0a831f5 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -22,6 +22,7 @@ import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; +import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; @@ -205,7 +206,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu File workDir = outputGvcf.getParentFile(); File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker); - ensureLocalCopy(new File(inputBam.getPath() + ".bai"), workDir, tracker); + ensureLocalCopy(SequenceUtil.getExpectedIndex(inputBam), workDir, tracker); File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker); ensureLocalCopy(new File(refFastaLocal.getPath() + ".fai"), workDir, tracker); From 8ea56254cd02a4e77b9e2e64ac103ee8208e7bfd Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Jan 2024 20:54:51 -0600 Subject: [PATCH 18/45] Add new pipeline option to perform more frequent deletion of intermediate files (#262) --- .../pipeline/TaskFileManager.java | 2 + .../SequenceAnalysisController.java | 9 +++ .../pipeline/ProcessVariantsHandler.java | 16 ++++++ .../pipeline/SequenceAlignmentTask.java | 56 ++++++++++++++++++- .../pipeline/TaskFileManagerImpl.java | 22 ++++++-- 5 files changed, 100 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/TaskFileManager.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/TaskFileManager.java index 6263c7994..beecdcb51 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/TaskFileManager.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/TaskFileManager.java @@ -56,6 +56,8 @@ public interface TaskFileManager extends PipelineOutputTracker boolean isDeleteIntermediateFiles(); + public boolean performCleanupAfterEachStep(); + boolean isCopyInputsLocally(); void addPicardMetricsFiles(List files) throws PipelineJobException; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 11d319e7f..096f16ff9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -3298,6 +3298,15 @@ public ApiResponse execute(CheckFileStatusForm form, BindException errors) toolArr.put(intermediateFiles); + JSONObject performCleanupAfterEachStep = new JSONObject(); + performCleanupAfterEachStep.put("name", "performCleanupAfterEachStep"); + performCleanupAfterEachStep.put("defaultValue", true); + performCleanupAfterEachStep.put("label", "Perform Cleanup After Each Step"); + performCleanupAfterEachStep.put("description", "Is selected, intermediate files from this job will be deleted after each step, instead of once at the end of the job. This can reduce the working directory size. Note: this will only apply if deleteIntermediateFiles is selected, and this is not supported across every possible pipeline type."); + performCleanupAfterEachStep.put("fieldXtype", "checkbox"); + + toolArr.put(performCleanupAfterEachStep); + ret.put("toolParameters", toolArr); ret.put("description", handler.getDescription()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java index d1efdc9a2..ac4624b6d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java @@ -483,6 +483,12 @@ public static File processVCF(File input, Integer libraryId, JobContext ctx, Res action.setEndTime(end); ctx.getJob().getLogger().info(stepCtx.getProvider().getLabel() + " Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); + if (ctx.getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(currentVCF, new File(currentVCF.getPath() + ".tbi")); + getTaskFileManagerImpl(ctx).deleteIntermediateFiles(toRetain); + } + resumer.setStepComplete(stepIdx, input.getPath(), action, currentVCF); } @@ -886,4 +892,14 @@ public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, TaskFil } } } + + private static TaskFileManagerImpl getTaskFileManagerImpl(JobContext ctx) throws PipelineJobException + { + if (!(ctx.getFileManager() instanceof TaskFileManagerImpl tfm)) + { + throw new PipelineJobException("Expected fileManager to be a TaskFileManagerImpl"); + } + + return tfm; + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index aba4524bf..f0b4c4aae 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -96,6 +96,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -347,11 +348,27 @@ private Map> performFastqPreprocessing(SequenceReadse toAlign.put(d, pair); } + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = toAlign.values().stream().map(x -> Arrays.asList(x.first, x.second)).flatMap(List::stream).filter(Objects::nonNull).toList(); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } + _resumer.setFastqPreprocessingDone(toAlign, preprocessingActions, copiedInputs); return toAlign; } + private TaskFileManagerImpl getTaskFileManagerImpl() throws PipelineJobException + { + if (!(getHelper().getFileManager() instanceof TaskFileManagerImpl tfm)) + { + throw new PipelineJobException("Expected fileManager to be a TaskFileManagerImpl"); + } + + return tfm; + } + private SequenceAlignmentJob getPipelineJob() { return (SequenceAlignmentJob)getJob(); @@ -667,6 +684,12 @@ private void alignSet(Readset rs, String basename, Map alignActions = new ArrayList<>(); bam = doAlignment(referenceGenome, rs, files, alignActions); + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } + _resumer.setInitialAlignmentDone(bam, alignActions); } @@ -742,6 +765,12 @@ else if (step.expectToCreateNewBam()) action.setEndTime(end); getJob().getLogger().info(stepCtx.getProvider().getLabel() + " Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); postProcessActions.add(action); + + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } } } @@ -791,6 +820,12 @@ else if (step.expectToCreateNewBam()) } } + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } + _resumer.setBamSortDone(bam, sortAction); } @@ -841,6 +876,12 @@ else if (step.expectToCreateNewBam()) renameAction.setEndTime(end); getJob().getLogger().info("Rename Bam Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(renamedBam, SequenceUtil.getExpectedIndex(renamedBam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } + _resumer.setBamRenameDone(renamedBam, List.of(renameAction)); } @@ -888,6 +929,12 @@ else if (step.expectToCreateNewBam()) indexAction.setEndTime(end); getJob().getLogger().info("IndexBam Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(renamedBam, SequenceUtil.getExpectedIndex(renamedBam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } + _resumer.setIndexBamDone(true, indexAction); } } @@ -1045,8 +1092,15 @@ else if (step.expectToCreateNewBam()) } analysisActions.add(action); - _resumer.setBamAnalysisComplete(analysisActions); + + if (getHelper().getFileManager().performCleanupAfterEachStep()) + { + List toRetain = Arrays.asList(renamedBam, SequenceUtil.getExpectedIndex(renamedBam)); + getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); + } } + + _resumer.setBamAnalysisComplete(analysisActions); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java index 670534465..622c2ed74 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java @@ -7,6 +7,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DurationFormatUtils; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Table; import org.labkey.api.data.TableInfo; @@ -707,6 +708,12 @@ public boolean isDeleteIntermediateFiles() return "true".equals(_job.getParameters().get("deleteIntermediateFiles")); } + @Override + public boolean performCleanupAfterEachStep() + { + return "true".equals(_job.getParameters().get("performCleanupAfterEachStep")); + } + @Override public boolean isCopyInputsLocally() { @@ -726,19 +733,26 @@ private Set getInputPaths() @Override public void deleteIntermediateFiles() throws PipelineJobException { - _job.getLogger().info("Cleaning up intermediate files"); + deleteIntermediateFiles(Collections.emptySet()); + } - Set inputs = new HashSet<>(); - inputs.addAll(getSupport().getInputFiles()); + public void deleteIntermediateFiles(@NotNull Collection filesToRetain) throws PipelineJobException + { + _job.getLogger().info("Cleaning up intermediate files"); Set inputPaths = getInputPaths(); - if (isDeleteIntermediateFiles()) { _job.getLogger().debug("Intermediate files will be removed, total: " + _intermediateFiles.size()); for (File f : _intermediateFiles) { + if (filesToRetain.contains(f)) + { + _job.getLogger().debug("\tFile marked for deletion, but was part of filesToRetain and will not be deleted: " + f.getPath()); + continue; + } + _job.getLogger().debug("\tDeleting intermediate file: " + f.getPath()); if (inputPaths.contains(f.getPath())) From 3d7f83817890023eae55ad86138f65d836d7c7de Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Jan 2024 07:51:04 -0800 Subject: [PATCH 19/45] Bugfix to DeepVariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 2d0a831f5..c658090bb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -264,6 +264,13 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu { throw new PipelineJobException(e); } + + execute(Arrays.asList("/bin/bash/", localBashScript.getPath())); + + if (!outputGvcf.exists()) + { + throw new PipelineJobException("File not found: " + outputGvcf.getPath()); + } } } } From a62258df0423993477b9f88a92a0fa845d3d4be8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Jan 2024 11:15:30 -0800 Subject: [PATCH 20/45] Bugfix to DeepVariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index c658090bb..cfae02936 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -265,7 +265,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu throw new PipelineJobException(e); } - execute(Arrays.asList("/bin/bash/", localBashScript.getPath())); + execute(Arrays.asList("/bin/bash", localBashScript.getPath())); if (!outputGvcf.exists()) { From 207b83eee9502f15a34452687d4e84082c54e498 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 21 Jan 2024 16:19:11 -0800 Subject: [PATCH 21/45] Bugfix to DeepVariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index cfae02936..148c65013 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -246,6 +246,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu writer.println("\t-u $UID \\"); writer.println("\t-e TMPDIR=/work/tmpDir \\"); writer.println("\t-e USERID=$UID \\"); + writer.println("\t--entrypoint /bin/bash \\"); writer.println("\t-w /work \\"); writer.println("\tgoogle/deepvariant:" + binVersion + " \\"); writer.println("\t/work/" + dockerBashScript.getName()); From c522d20e89960242942000c4829ac120825d0cd6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 21 Jan 2024 18:28:05 -0800 Subject: [PATCH 22/45] Bugfix to DeepVariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 148c65013..7ce9fc422 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -204,6 +204,9 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutputTracker tracker, String binVersion, List extraArgs) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); + File outputVcf = new File(outputGvcf.getPath().replaceAll(".g.vcf", ".vcf")); + tracker.addIntermediateFile(outputVcf); + tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker); ensureLocalCopy(SequenceUtil.getExpectedIndex(inputBam), workDir, tracker); @@ -221,6 +224,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu bashArgs.add("--ref=/work/" + refFastaLocal.getName()); bashArgs.add("--reads=/work/" + inputBamLocal.getName()); bashArgs.add("--output_gvcf=/work/" + outputGvcf.getName()); + bashArgs.add("--output_vcf=/work/" + outputVcf.getName()); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); if (maxThreads != null) { From 97ace2f99c06050636e1a51bc14c03ffafb34499 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 21 Jan 2024 21:27:01 -0800 Subject: [PATCH 23/45] Dont set tmpdir for DeepVariant/docker --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 1 - 1 file changed, 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 7ce9fc422..dcd4005af 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -248,7 +248,6 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); - writer.println("\t-e TMPDIR=/work/tmpDir \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t--entrypoint /bin/bash \\"); writer.println("\t-w /work \\"); From e4ddb6170632b1a01e48cef96e28acdbd7affe8b Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Jan 2024 08:02:27 -0800 Subject: [PATCH 24/45] Ensure DeepVariant/docker has FAI index --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index dcd4005af..53874e2e7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -212,7 +212,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu ensureLocalCopy(SequenceUtil.getExpectedIndex(inputBam), workDir, tracker); File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker); - ensureLocalCopy(new File(refFastaLocal.getPath() + ".fai"), workDir, tracker); + ensureLocalCopy(new File(refFasta.getPath() + ".fai"), workDir, tracker); ensureLocalCopy(new File(FileUtil.getBaseName(refFasta.getPath()) + ".dict"), workDir, tracker); File localBashScript = new File(workDir, "docker.sh"); From 9419bbfdcfd84d7421122dd1c68adeb053200584 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Jan 2024 14:26:34 -0800 Subject: [PATCH 25/45] Support performCleanupAfterEachStep in more places of the UI --- .../web/SequenceAnalysis/panel/AlignmentImportPanel.js | 8 ++++++++ .../web/SequenceAnalysis/panel/SequenceAnalysisPanel.js | 8 ++++++++ .../web/SequenceAnalysis/panel/VariantProcessingPanel.js | 8 ++++++++ .../web/singlecell/panel/SingleCellProcessingPanel.js | 8 ++++++++ 4 files changed, 32 insertions(+) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AlignmentImportPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AlignmentImportPanel.js index 53132b175..73a786c9d 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AlignmentImportPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AlignmentImportPanel.js @@ -94,6 +94,14 @@ Ext4.define('SequenceAnalysis.panel.AlignmentImportPanel', { inputValue: true, checked: true, xtype: 'checkbox' + },{ + fieldLabel: 'Perform Cleanup After Each Step', + helpPopup: 'Is selected, intermediate files from this job will be deleted after each step, instead of once at the end of the job. This can reduce the working directory size. Note: this will only apply if deleteIntermediateFiles is selected, and this is not supported across every possible pipeline type.', + name: 'performCleanupAfterEachStep', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' },{ fieldLabel: 'Treatment of Input Files', xtype: 'combo', diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js index 42e7bec99..bd3a422dd 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js @@ -309,6 +309,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { uncheckedValue: false, checked: true, xtype: 'checkbox' + },{ + fieldLabel: 'Perform Cleanup After Each Step', + helpPopup: 'Is selected, intermediate files from this job will be deleted after each step, instead of once at the end of the job. This can reduce the working directory size. Note: this will only apply if deleteIntermediateFiles is selected, and this is not supported across every possible pipeline type.', + name: 'performCleanupAfterEachStep', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' },{ fieldLabel: 'Copy Inputs To Working Directory?', helpPopup: 'Check to copy the input files to the working directory. Depending on your environment, this may or may not help performance.', diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantProcessingPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantProcessingPanel.js index c76c39f89..9cf64ff68 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantProcessingPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantProcessingPanel.js @@ -74,6 +74,14 @@ Ext4.define('SequenceAnalysis.panel.VariantProcessingPanel', { inputValue: true, checked: true, xtype: 'checkbox' + },{ + fieldLabel: 'Perform Cleanup After Each Step', + helpPopup: 'Is selected, intermediate files from this job will be deleted after each step, instead of once at the end of the job. This can reduce the working directory size. Note: this will only apply if deleteIntermediateFiles is selected, and this is not supported across every possible pipeline type.', + name: 'performCleanupAfterEachStep', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' }, this.getSaveTemplateCfg()] }; }, diff --git a/singlecell/resources/web/singlecell/panel/SingleCellProcessingPanel.js b/singlecell/resources/web/singlecell/panel/SingleCellProcessingPanel.js index 52ebeec15..242b90c1f 100644 --- a/singlecell/resources/web/singlecell/panel/SingleCellProcessingPanel.js +++ b/singlecell/resources/web/singlecell/panel/SingleCellProcessingPanel.js @@ -97,6 +97,14 @@ Ext4.define('SingleCell.panel.SingleCellProcessingPanel', { inputValue: true, checked: true, xtype: 'checkbox' + },{ + fieldLabel: 'Perform Cleanup After Each Step', + helpPopup: 'Is selected, intermediate files from this job will be deleted after each step, instead of once at the end of the job. This can reduce the working directory size. Note: this will only apply if deleteIntermediateFiles is selected, and this is not supported across every possible pipeline type.', + name: 'performCleanupAfterEachStep', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' }, this.getSaveTemplateCfg()] }; }, From fd99e2ab23db8810b1e5964b096592bd9000d980 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Jan 2024 21:27:56 -0800 Subject: [PATCH 26/45] Add --memory to deepvariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 53874e2e7..8b629afec 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -251,6 +251,12 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu writer.println("\t-e USERID=$UID \\"); writer.println("\t--entrypoint /bin/bash \\"); writer.println("\t-w /work \\"); + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); + writer.println("\t--memory='" + maxRam + "g' \\"); + } writer.println("\tgoogle/deepvariant:" + binVersion + " \\"); writer.println("\t/work/" + dockerBashScript.getName()); writer.println("EXIT_CODE=$?"); From 300300a233f0809ba0dcbc9cc14cf8384a790a76 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 23 Jan 2024 10:29:01 -0800 Subject: [PATCH 27/45] Add external TMPDIR to deepvariant docker command --- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 8b629afec..08efce26f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -247,6 +247,10 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) + { + writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); + } writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t--entrypoint /bin/bash \\"); From f9781345fceccdca41cd4d436c64c8f28e3ae72f Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 23 Jan 2024 21:04:46 -0800 Subject: [PATCH 28/45] Improve resume behavior for CRAM conversion --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index f0b4c4aae..bf149d4ff 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1113,7 +1113,14 @@ else if (step.expectToCreateNewBam()) final File cramFile = new File(renamedBam.getParentFile(), FileUtil.getBaseName(renamedBam) + ".cram"); final File cramFileIdx = new File(cramFile.getPath() + ".crai"); Integer threads = SequenceTaskHelper.getMaxThreads(getJob()); - new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads); + if (cramFileIdx.exists()) + { + new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads); + } + else + { + getJob().getLogger().debug("CRAM index already exists, skipping conversion"); + } final File finalBam = renamedBam; final File finalBamIdx = new File(renamedBam.getPath() + ".bai"); From f50e5d813ff9f34cecc737dc03fce1d9b431815b Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 24 Jan 2024 11:01:10 -0800 Subject: [PATCH 29/45] Additional place to allow archived readsets --- .../labkey/singlecell/analysis/AbstractSingleCellHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 7d7c024ba..d2b95a86c 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -250,7 +250,7 @@ else if (so.getReadset() == null) throw new PipelineJobException("Readset is blank for loupe file: " + loupeId); } - ctx.getSequenceSupport().cacheReadset(so.getReadset(), ctx.getJob().getUser()); + ctx.getSequenceSupport().cacheReadset(so.getReadset(), ctx.getJob().getUser(), true); } } else From 961ab8c78cef504332cd90404605c19e1168c1e0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 24 Jan 2024 13:49:10 -0800 Subject: [PATCH 30/45] Correct arguments to BcftoolsFillTagsStep --- .../sequenceanalysis/run/analysis/BcftoolsFillTagsStep.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillTagsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillTagsStep.java index 3016cf2c0..8feaa42dd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillTagsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillTagsStep.java @@ -100,6 +100,7 @@ public VariantProcessingStep.Output processVariants(File inputVCF, File outputDi throw new PipelineJobException("No annotations were selected"); } + options.add("--"); options.add("-t"); options.add(StringUtils.join(annotations, ",")); From 9b6e10970ce6f98dcd462fb15cf6202b9980d497 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 29 Jan 2024 19:43:33 -0800 Subject: [PATCH 31/45] Fix typo in CRAM conversion/resume logic --- .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index bf149d4ff..2dd067144 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1113,7 +1113,7 @@ else if (step.expectToCreateNewBam()) final File cramFile = new File(renamedBam.getParentFile(), FileUtil.getBaseName(renamedBam) + ".cram"); final File cramFileIdx = new File(cramFile.getPath() + ".crai"); Integer threads = SequenceTaskHelper.getMaxThreads(getJob()); - if (cramFileIdx.exists()) + if (!cramFileIdx.exists()) { new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads); } From 92b1f8093814c6e40a1f19fcc61660795f7dab12 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 30 Jan 2024 21:12:25 -0800 Subject: [PATCH 32/45] Add support for glnexus --- .../pipeline/BcftoolsRunner.java | 2 +- .../SequenceAnalysisModule.java | 8 +- .../analysis/DeepVariantHandler.java | 162 ++++++++++ .../analysis/GLNexusHandler.java | 283 ++++++++++++++++++ .../run/analysis/DeepVariantAnalysis.java | 21 +- .../run/util/BgzipRunner.java | 2 +- .../src/org/labkey/cluster/ClusterModule.java | 1 - 7 files changed, 469 insertions(+), 10 deletions(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java index 91a6d3937..0767a7471 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java @@ -18,7 +18,7 @@ public BcftoolsRunner(@Nullable Logger logger) super(logger); } - public File getBcfToolsPath() + public static File getBcfToolsPath() { return SequencePipelineService.get().getExeForPackage("BCFTOOLSPATH", "bcftools"); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 1d095b04a..950cae97b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -45,6 +45,8 @@ import org.labkey.sequenceanalysis.analysis.BamHaplotypeHandler; import org.labkey.sequenceanalysis.analysis.CombineStarGeneCountsHandler; import org.labkey.sequenceanalysis.analysis.CombineSubreadGeneCountsHandler; +import org.labkey.sequenceanalysis.analysis.DeepVariantHandler; +import org.labkey.sequenceanalysis.analysis.GLNexusHandler; import org.labkey.sequenceanalysis.analysis.GenotypeGVCFHandler; import org.labkey.sequenceanalysis.analysis.HaplotypeCallerHandler; import org.labkey.sequenceanalysis.analysis.LiftoverHandler; @@ -330,6 +332,8 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler()); SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler()); SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler()); + SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); + SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler()); @@ -382,8 +386,8 @@ public void doStartupAfterSpringConfig(ModuleContext moduleContext) LDKService.get().registerQueryButton(new DownloadSraButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); LDKService.get().registerQueryButton(new ArchiveReadsetsButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); - LDKService.get().registerQueryButton(new ChangeReadsetStatusForAnalysesButton(), "sequenceanalysis", "sequence_analyses"); - LDKService.get().registerQueryButton(new ChangeReadsetStatusButton(), "sequenceanalysis", "sequence_readsets"); + LDKService.get().registerQueryButton(new ChangeReadsetStatusForAnalysesButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_ANALYSES); + LDKService.get().registerQueryButton(new ChangeReadsetStatusButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); ExperimentService.get().registerExperimentRunTypeSource(new ExperimentRunTypeSource() { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java new file mode 100644 index 000000000..6b3c3d6b8 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java @@ -0,0 +1,162 @@ +package org.labkey.sequenceanalysis.analysis; + +import org.apache.commons.lang3.StringUtils; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileType; +import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +/** + * Created by bimber on 2/3/2016. + */ +public class DeepVariantHandler extends AbstractParameterizedOutputHandler +{ + private final FileType _bamOrCramFileType = new FileType(Arrays.asList("bam", "cram"), "bam"); + + public DeepVariantHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Run DeepVariant", "This will run DeepVariant on the selected BAMs to generate gVCF files.", null, DeepVariantAnalysis.getToolDescriptors()); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && _bamOrCramFileType.isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + public class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + String modelType = ctx.getParams().optString("modelType"); + DeepVariantAnalysis.inferModelType(modelType, ctx); + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + PipelineJob job = ctx.getJob(); + if (inputFiles.size() != 1) + { + throw new PipelineJobException("Expected a single input file"); + } + + SequenceOutputFile so = inputFiles.get(0); + + RecordedAction action = new RecordedAction(getName()); + action.setStartTime(new Date()); + + action.addInput(so.getFile(), "Input BAM File"); + + File outputFile = new File(ctx.getOutputDir(), FileUtil.getBaseName(so.getFile()) + ".g.vcf.gz"); + + DeepVariantAnalysis.DeepVariantWrapper wrapper = new DeepVariantAnalysis.DeepVariantWrapper(job.getLogger()); + wrapper.setOutputDir(ctx.getOutputDir()); + + ReferenceGenome referenceGenome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); + if (referenceGenome == null) + { + throw new PipelineJobException("No reference genome found for output: " + so.getRowid()); + } + + String inferredModelType = ctx.getSequenceSupport().getCachedObject("modelType", String.class); + String modelType = inferredModelType == null ? ctx.getParams().optString("modelType") : inferredModelType; + if (modelType == null) + { + throw new PipelineJobException("Missing model type"); + } + + List args = new ArrayList<>(getClientCommandArgs(ctx.getParams())); + args.add("--model_type=" + modelType); + + String binVersion = ctx.getParams().optString("binVersion"); + if (binVersion == null) + { + throw new PipelineJobException("Missing binVersion"); + } + + wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, ctx.getFileManager(), binVersion, args); + + action.addOutput(outputFile, "gVCF File", false); + + SequenceOutputFile o = new SequenceOutputFile(); + o.setName(outputFile.getName()); + o.setFile(outputFile); + o.setLibrary_id(so.getLibrary_id()); + o.setCategory("DeepVariant gVCF File"); + o.setReadset(so.getReadset()); + o.setDescription("DeepVariant Version: " + binVersion); + + ctx.addSequenceOutput(o); + + ctx.addActions(action); + } + + private List getClientCommandArgs(JSONObject params) + { + List ret = new ArrayList<>(); + + for (ToolParameterDescriptor desc : getParameters()) + { + if (desc.getCommandLineParam() != null) + { + String val = params.optString(desc.getName(), null); + if (StringUtils.trimToNull(val) != null) + { + ret.addAll(desc.getCommandLineParam().getArguments(" ", val)); + } + } + } + + return ret; + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java new file mode 100644 index 000000000..afe54151f --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -0,0 +1,283 @@ +package org.labkey.sequenceanalysis.analysis; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.FileType; +import org.labkey.api.writer.PrintWriters; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.run.util.BgzipRunner; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler.VCF_CATEGORY; + +/** + * Created by bimber on 2/3/2016. + */ +public class GLNexusHandler extends AbstractParameterizedOutputHandler +{ + protected FileType _gvcfFileType = new FileType(List.of(".g.vcf"), ".g.vcf", false, FileType.gzSupportLevel.SUPPORT_GZ); + + public GLNexusHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Run DeepVariant", "This will run GLNexus on the selected gVCFs.", null, Arrays.asList( + ToolParameterDescriptor.create("binVersion", "GLNexus Version", "The version of GLNexus to run, which is passed to their docker container", "textfield", new JSONObject(){{ + put("allowBlank", false); + }}, "v1.2.7"), + ToolParameterDescriptor.create("fileBaseName", "Filename", "This is the basename that will be used for the output gzipped VCF", "textfield", new JSONObject(){{ + put("allowBlank", false); + }}, "CombinedGenotypes") + )); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + + return o.getFile() != null && _gvcfFileType.isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + @Override + public boolean doSplitJobs() + { + return false; + } + + public class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + Set genomeIds = new HashSet<>(); + for (SequenceOutputFile so : inputFiles) + { + genomeIds.add(so.getLibrary_id()); + } + + if (genomeIds.size() > 1) + { + throw new PipelineJobException("The selected files use more than one genome"); + } + else if (genomeIds.isEmpty()) + { + throw new PipelineJobException("No genome ID found for inputs"); + } + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + RecordedAction action = new RecordedAction(getName()); + action.setStartTime(new Date()); + + Set genomeIds = new HashSet<>(); + List inputVcfs = new ArrayList<>(); + for (SequenceOutputFile so : inputFiles) + { + genomeIds.add(so.getLibrary_id()); + inputVcfs.add(so.getFile()); + action.addInput(so.getFile(), "Input gVCF File"); + } + + if (genomeIds.size() > 1) + { + throw new PipelineJobException("The selected files use more than one genome"); + } + else if (genomeIds.isEmpty()) + { + throw new PipelineJobException("No genome ID found for inputs"); + } + + int genomeId = genomeIds.iterator().next(); + + String basename = StringUtils.trimToNull(ctx.getParams().optString("fileBaseName")); + if (basename == null) + { + throw new PipelineJobException("Basename not supplied for output VCF"); + } + + String binVersion = ctx.getParams().optString("binVersion"); + if (binVersion == null) + { + throw new PipelineJobException("Missing binVersion"); + } + + File outputVcf = new File(ctx.getOutputDir(), basename + ".vcf.gz"); + + new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, outputVcf, ctx.getFileManager(), binVersion); + + ctx.getLogger().debug("adding sequence output: " + outputVcf.getPath()); + SequenceOutputFile so1 = new SequenceOutputFile(); + so1.setName(outputVcf.getName()); + so1.setDescription("GLNexus output. Version: " + binVersion + ". Total samples: " + inputFiles.size()); + so1.setFile(outputVcf); + so1.setLibrary_id(genomeId); + so1.setCategory(VCF_CATEGORY); + so1.setContainer(ctx.getJob().getContainerId()); + so1.setCreated(new Date()); + so1.setModified(new Date()); + + ctx.getFileManager().addSequenceOutput(so1); + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + } + + public static class GLNexusWrapper extends AbstractCommandWrapper + { + public GLNexusWrapper(Logger logger) + { + super(logger); + } + + private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + { + try + { + if (workingDirectory.equals(input.getParentFile())) + { + return input; + } + + File local = new File(workingDirectory, input.getName()); + if (!local.exists()) + { + getLogger().debug("Copying file locally: " + input.getPath()); + FileUtils.copyFile(input, local); + } + + output.addIntermediateFile(local); + + return local; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion) throws PipelineJobException + { + File workDir = outputVcf.getParentFile(); + tracker.addIntermediateFile(outputVcf); + tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); + + List gvcfsLocal = new ArrayList<>(); + for (File f : inputGvcfs) + { + gvcfsLocal.add(ensureLocalCopy(f, workDir, tracker)); + ensureLocalCopy(SequenceUtil.getExpectedIndex(f), workDir, tracker); + } + + File localBashScript = new File(workDir, "docker.sh"); + tracker.addIntermediateFile(localBashScript); + + try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) + { + writer.println("#!/bin/bash"); + writer.println("set -x"); + writer.println("WD=`pwd`"); + writer.println("HOME=`echo ~/`"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + writer.println("sudo $DOCKER pull quay.io/mlin/glnexus:" + binVersion); + writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("\t-v \"${WD}:/work\" \\"); + writer.println("\t-v \"${HOME}:/homeDir\" \\"); + if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) + { + writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); + } + writer.println("\t-u $UID \\"); + writer.println("\t-e USERID=$UID \\"); + writer.println("\t--entrypoint /bin/bash \\"); + writer.println("\t-w /work \\"); + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + writer.println("\t--memory='" + maxRam + "g' \\"); + } + writer.println("\tquay.io/mlin/glnexus:" + binVersion + " \\"); + + writer.println("\tglnexus_cli" + " \\"); + writer.println("\t--config DeepVariant" + " \\"); + + gvcfsLocal.forEach(f -> { + writer.println("\t-i gvcf=/work/" + f.getPath() + " \\"); + }); + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (maxThreads != null) + { + writer.println("\t--threads " + maxThreads + " \\"); + } + + File bcftools = BcftoolsRunner.getBcfToolsPath(); + File bgzip = BgzipRunner.getExe(); + writer.println("\t| " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + execute(Arrays.asList(localBashScript.getPath())); + + if (!outputVcf.exists()) + { + throw new PipelineJobException("File not found: " + outputVcf.getPath()); + } + + File idxFile = new File(outputVcf.getPath() + ".tbi"); + if (!idxFile.exists()) + { + throw new PipelineJobException("Missing index: " + idxFile.getPath()); + } + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 08efce26f..06121ab4f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -83,15 +83,20 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException throw new PipelineJobException("Missing model type"); } + inferModelType(modelType, getPipelineCtx()); + } + + public static void inferModelType(String modelType, PipelineContext ctx) throws PipelineJobException + { if ("AUTO".equals(modelType)) { - getPipelineCtx().getLogger().info("Inferring model type by readset type:"); - if (support.getCachedReadsets().size() != 1) + ctx.getLogger().info("Inferring model type by readset type:"); + if (ctx.getSequenceSupport().getCachedReadsets().size() != 1) { - throw new PipelineJobException("Expected a single cached readset, found: " + support.getCachedReadsets().size()); + throw new PipelineJobException("Expected a single cached readset, found: " + ctx.getSequenceSupport().getCachedReadsets().size()); } - Readset rs = support.getCachedReadsets().get(0); + Readset rs = ctx.getSequenceSupport().getCachedReadsets().get(0); if ("ILLUMINA".equals(rs.getPlatform())) { switch (rs.getApplication()) @@ -119,7 +124,7 @@ else if ("PACBIO".equals(rs.getPlatform())) throw new PipelineJobException("Unable to infer modelType for: " + rs.getName()); } - support.cacheObject("modelType", modelType); + ctx.getSequenceSupport().cacheObject("modelType", modelType); } } @@ -285,6 +290,12 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu { throw new PipelineJobException("File not found: " + outputGvcf.getPath()); } + + File idxFile = new File(outputGvcf.getPath() + ".tbi"); + if (!idxFile.exists()) + { + throw new PipelineJobException("Missing index: " + idxFile.getPath()); + } } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java index a0867ae5e..136dba7a3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java @@ -95,7 +95,7 @@ private List getParams(File input, boolean preserveInput) return params; } - public File getExe() + public static File getExe() { return SequencePipelineService.get().getExeForPackage("BGZIPPATH", "bgzip"); } diff --git a/cluster/src/org/labkey/cluster/ClusterModule.java b/cluster/src/org/labkey/cluster/ClusterModule.java index 4cf4399ec..e7638ddcc 100644 --- a/cluster/src/org/labkey/cluster/ClusterModule.java +++ b/cluster/src/org/labkey/cluster/ClusterModule.java @@ -45,7 +45,6 @@ import org.labkey.cluster.query.ViewClusterSubmissionsButton; import org.labkey.cluster.query.ViewJavaLogButton; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; From c181b87cf72256dcefd06f0cd3221f590dd9a526 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 30 Jan 2024 21:49:07 -0800 Subject: [PATCH 33/45] Allow folder-level setting of 10x defaults --- .../resources/views/cDNAImport.view.xml | 1 + .../resources/views/poolImport.view.xml | 1 + .../web/singlecell/panel/PoolImportPanel.js | 67 ++++++++++++- .../web/singlecell/panel/TenxSettingsPanel.js | 94 +++++++++++++++++++ .../web/singlecell/panel/cDNAImportPanel.js | 27 +++++- .../singlecell/SingleCellController.java | 63 +++++++++++++ 6 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js diff --git a/singlecell/resources/views/cDNAImport.view.xml b/singlecell/resources/views/cDNAImport.view.xml index 44afd9b9c..14c4dde79 100644 --- a/singlecell/resources/views/cDNAImport.view.xml +++ b/singlecell/resources/views/cDNAImport.view.xml @@ -2,6 +2,7 @@ + diff --git a/singlecell/resources/views/poolImport.view.xml b/singlecell/resources/views/poolImport.view.xml index c4a638ccb..3174e4aff 100644 --- a/singlecell/resources/views/poolImport.view.xml +++ b/singlecell/resources/views/poolImport.view.xml @@ -2,6 +2,7 @@ + diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index ef39125eb..dd0f909ba 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -56,6 +56,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { name: 'assaytype', labels: ['Assay Type', 'Assay Type', 'Assay', 'treatment'], allowRowSpan: false, + alwaysShow: true, allowBlank: false, transform: 'assaytype' },{ @@ -159,11 +160,16 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }, assaytype: function(val, panel) { + var requireAssayType = panel.down('#requireAssayType').getValue(); if (val && (val === '--' || val === '-')) { - val = 'N/A'; + val = null; + } + + if (!requireAssayType && !val) { + return 'N/A'; } - return val || 'N/A'; + return val; }, subject: function(val, panel) { @@ -387,6 +393,28 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }); this.callParent(arguments); + + Ext4.Msg.wait('Loading...'); + LABKEY.Ajax.request({ + method: 'POST', + url: LABKEY.ActionURL.buildURL('singlecell', 'getTenXImportDefaults'), + scope: this, + success: function(response){ + LDK.Utils.decodeHttpResponseJson(response); + if (response.responseJSON){ + this.configDefaults = response.responseJSON; + for (var name in this.configDefaults){ + var item = this.down('#' + name); + if (item){ + item.setValue(this.configDefaults[name]); + } + } + + Ext4.Msg.hide(); + } + }, + failure: LDK.Utils.getErrorCallback() + }); }, getPanelItems: function(){ @@ -466,6 +494,31 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { linkCls: 'labkey-text-link', href: LABKEY.ActionURL.buildURL('query', 'executeQuery', Laboratory.Utils.getQueryContainerPath(), {schemaName: 'singlecell', 'query.queryName': 'stim_types'}), style: 'margin-top: 10px;' + }, { + xtype: 'ldk-linkbutton', + width: null, + hidden: !LABKEY.Security.currentUser.isAdmin, + text: 'Set Page Defaults', + itemId: 'copyPrevious', + linkCls: 'labkey-text-link', + scope: this, + handler: function (btn) { + Ext4.create('Ext.window.Window', { + title: 'Set Page Defaults', + items: [{ + xtype: 'singlecell-tenxsettingspanel', + border: false, + hidePageLoadWarning: false, + hideButtons: true + }], + buttons: SingleCell.panel.TenxSettingsPanel.getButtons().concat([{ + text: 'Cancel', + handler: function (btn) { + btn.up('window').close(); + } + }]) + }).show(); + } },{ xtype: 'textfield', style: 'margin-top: 20px;', @@ -506,6 +559,11 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { fieldLabel: 'Require Cite-Seq Library', itemId: 'requireCITE', checked: false + },{ + xtype: 'checkbox', + fieldLabel: 'Require Assay Type', + itemId: 'requireAssayType', + checked: true },{ xtype: 'checkbox', fieldLabel: 'Combine Hashing and Cite-Seq Libraries', @@ -1049,6 +1107,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { var data = []; var missingValues = false; var requireHTO = this.down('#requireHTO').getValue() || (this.down('#requireHashTag') && this.down('#requireHashTag').getValue()); + var requireAssayType = this.down('#requireAssayType').getValue() Ext4.Array.forEach(parsedRows, function(row, rowIdx){ var toAdd = [rowIdx + 1]; Ext4.Array.forEach(colIdxs, function(colIdx){ @@ -1060,6 +1119,10 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { allowBlank = false; } + if (requireAssayType && colDef.name == 'assaytype') { + allowBlank = false; + } + if (allowBlank === false && Ext4.isEmpty(row[propName])){ missingValues = true; toAdd.push('MISSING'); diff --git a/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js b/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js new file mode 100644 index 000000000..6940529ff --- /dev/null +++ b/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js @@ -0,0 +1,94 @@ +Ext4.define('SingleCell.panel.TenxSettingsPanel', { + extend: 'Ext.panel.Panel', + alias: 'widget.singlecell-tenxsettingspanel', + + hidePageLoadWarning: true, + hideButtons: false, + maxWidth: 650, + + initComponent: function(){ + Ext4.applyIf(this, { + bodyStyle: 'padding: 5px;', + items: [{ + html: 'Loading...', + border: false + }], + buttons: this.hideButtons ? null : SingleCell.panel.TenxSettingsPanel.getButtons() + }); + + this.callParent(arguments); + + LABKEY.Ajax.request({ + method: 'POST', + url: LABKEY.ActionURL.buildURL('singlecell', 'getTenXImportDefaults'), + scope: this, + success: this.onDataLoad, + failure: LDK.Utils.getErrorCallback() + }); + }, + + onDataLoad: function(response){ + LDK.Utils.decodeHttpResponseJson(response); + this.removeAll(); + + if (response.responseJSON){ + var configDefaults = response.responseJSON; + var items = [{ + html: 'Note: you must reload this page before any change will be applied.', + border: false, + hidden: !!this.hidePageLoadWarning + },{ + xtype: 'checkbox', + fieldLabel: 'Require Assay Type', + labelWidth: 300, + itemId: 'requireAssayType', + checked: !!JSON.parse(configDefaults.requireAssayType) + },{ + xtype: 'checkbox', + fieldLabel: 'Combine Hashing and Cite-Seq', + labelWidth: 300, itemId: 'combineHashingCite', + checked: !!JSON.parse(configDefaults.combineHashingCite) + }]; + + this.add(items); + } + else { + this.add({html: 'Something went wrong loading saved data'}); + } + }, + + statics: { + getButtons: function () { + return [{ + text: 'Submit', + handler: function (btn) { + var win = btn.up('window'); + var panel = win ? win.down('singlecell-tenxsettingspanel') : btn.up('singlecell-tenxsettingspanel'); + + var params = {}; + params['requireAssayType'] = panel.down('#requireAssayType').getValue(); + params['combineHashingCite'] = panel.down('#combineHashingCite').getValue(); + + Ext4.Msg.wait('Saving...'); + LABKEY.Ajax.request({ + method: 'POST', + url: LABKEY.ActionURL.buildURL('singlecell', 'setTenXImportDefaults'), + jsonData: params, + scope: panel, + success: panel.onSuccess, + failure: LDK.Utils.getErrorCallback() + }) + } + }]; + } + }, + + onSuccess: function(){ + Ext4.Msg.hide(); + Ext4.Msg.alert('Success', 'Settings have been saved'); + + if (this.up('window')){ + this.up('window').close(); + } + } +}); \ No newline at end of file diff --git a/singlecell/resources/web/singlecell/panel/cDNAImportPanel.js b/singlecell/resources/web/singlecell/panel/cDNAImportPanel.js index 427484cbd..a02351135 100644 --- a/singlecell/resources/web/singlecell/panel/cDNAImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/cDNAImportPanel.js @@ -96,7 +96,32 @@ Ext4.define('SingleCell.panel.cDNAImportPanel', { scope: this, href: LABKEY.ActionURL.getContextPath() + '/singlecell/exampleData/ImportReadsetTemplate.xlsx' }] - }, { + },{ + xtype: 'ldk-linkbutton', + hidden: !LABKEY.Security.currentUser.isAdmin, + style: 'margin-top: 10px;', + text: 'Set Page Defaults', + itemId: 'copyPrevious', + linkCls: 'labkey-text-link', + scope: this, + handler: function (btn) { + Ext4.create('Ext.window.Window', { + title: 'Set Page Defaults', + items: [{ + xtype: 'singlecell-tenxsettingspanel', + border: false, + hidePageLoadWarning: false, + hideButtons: true + }], + buttons: SingleCell.panel.TenxSettingsPanel.getButtons().concat([{ + text: 'Cancel', + handler: function (btn) { + btn.up('window').close(); + } + }]) + }).show(); + } + },{ xtype: 'textfield', style: 'margin-top: 20px;', fieldLabel: 'Expt Number', diff --git a/singlecell/src/org/labkey/singlecell/SingleCellController.java b/singlecell/src/org/labkey/singlecell/SingleCellController.java index 160240070..64f9508d5 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellController.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellController.java @@ -21,6 +21,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONArray; +import org.labkey.api.action.ApiResponse; import org.labkey.api.action.ApiSimpleResponse; import org.labkey.api.action.ApiUsageException; import org.labkey.api.action.ExportAction; @@ -35,6 +36,7 @@ import org.labkey.api.data.ContainerManager; import org.labkey.api.data.ContainerType; import org.labkey.api.data.DbScope; +import org.labkey.api.data.PropertyManager; import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; @@ -47,6 +49,7 @@ import org.labkey.api.query.UserSchema; import org.labkey.api.security.RequiresPermission; import org.labkey.api.security.User; +import org.labkey.api.security.permissions.AdminPermission; import org.labkey.api.security.permissions.InsertPermission; import org.labkey.api.security.permissions.ReadPermission; import org.labkey.api.sequenceanalysis.SequenceOutputFile; @@ -494,4 +497,64 @@ public void addNavTrail(NavTree tree) } } + public final static String CONFIG_PROPERTY_DOMAIN_IMPORT = "org.labkey.singlecell.importsettings"; + + @RequiresPermission(ReadPermission.class) + public static class GetTenXImportDefaultsAction extends ReadOnlyApiAction + { + @Override + public ApiResponse execute(Object form, BindException errors) throws Exception + { + Container target = getContainer().isWorkbook() ? getContainer().getParent() : getContainer(); + Map resultProperties = new HashMap<>(PropertyManager.getProperties(target, CONFIG_PROPERTY_DOMAIN_IMPORT)); + + return new ApiSimpleResponse(resultProperties); + } + } + + @RequiresPermission(AdminPermission.class) + public static class SetTenXImportDefaultsAction extends MutatingApiAction + { + public static final String REQUIRE_ASSAY_TYPE = "requireAssayType"; + public static final String COMBINE_HASHING_CITE = "combineHashingCite"; + + @Override + public ApiResponse execute(SetSequenceImportDefaultsForm form, BindException errors) throws Exception + { + Container target = getContainer().isWorkbook() ? getContainer().getParent() : getContainer(); + PropertyManager.PropertyMap configMap = PropertyManager.getWritableProperties(target, CONFIG_PROPERTY_DOMAIN_IMPORT, true); + configMap.put(REQUIRE_ASSAY_TYPE, Boolean.valueOf(form.isRequireAssayType()).toString()); + configMap.put(COMBINE_HASHING_CITE, Boolean.valueOf(form.isCombineHashingCite()).toString()); + + configMap.save(); + + return new ApiSimpleResponse("success", true); + } + } + + public static class SetSequenceImportDefaultsForm + { + private boolean _requireAssayType = false; + private boolean _combineHashingCite = false; + + public boolean isRequireAssayType() + { + return _requireAssayType; + } + + public void setRequireAssayType(boolean requireAssayType) + { + _requireAssayType = requireAssayType; + } + + public boolean isCombineHashingCite() + { + return _combineHashingCite; + } + + public void setCombineHashingCite(boolean combineHashingCite) + { + _combineHashingCite = combineHashingCite; + } + } } From abaee3dd0e4f84c1581c96c496e44e361630f1db Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 30 Jan 2024 22:20:25 -0800 Subject: [PATCH 34/45] Support ensureSamplesShareAllGenomes for AppendNimble --- singlecell/resources/chunks/AppendNimble.R | 2 +- .../labkey/singlecell/pipeline/singlecell/AppendNimble.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 57fbb6f6a..2dc826d10 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) } saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 111c2be2d..bdfe7cb04 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -36,6 +36,10 @@ public Provider() {{ put("check", false); }}, false, null, true), + SeuratToolParameter.create("ensureSamplesShareAllGenomes", "Ensure Samples Share All Genomes", "If checked, the job will fail unless nimble data is found for each requested genome for all samples", "checkbox", new JSONObject() + {{ + put("check", true); + }}, true, null, true), SeuratToolParameter.create("maxLibrarySizeRatio", "Max Library Size Ratio", "This normalization relies on the assumption that the library size of the assay being normalized in negligible relative to the assayForLibrarySize. To verify this holds true, the method will error if librarySize(assayToNormalize)/librarySize(assayForLibrarySize) exceeds this value", "ldk-numberfield", new JSONObject() {{ put("decimalPrecision", 4); From 8c7216b3fc8b205c9a21676b73c1c78909a89087 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 06:39:58 -0800 Subject: [PATCH 35/45] Bugfix to 10x setting panel when values are null --- .../resources/web/singlecell/panel/TenxSettingsPanel.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js b/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js index 6940529ff..f0a826475 100644 --- a/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js +++ b/singlecell/resources/web/singlecell/panel/TenxSettingsPanel.js @@ -42,12 +42,12 @@ Ext4.define('SingleCell.panel.TenxSettingsPanel', { fieldLabel: 'Require Assay Type', labelWidth: 300, itemId: 'requireAssayType', - checked: !!JSON.parse(configDefaults.requireAssayType) + checked: !!JSON.parse(configDefaults.requireAssayType ?? false) },{ xtype: 'checkbox', fieldLabel: 'Combine Hashing and Cite-Seq', labelWidth: 300, itemId: 'combineHashingCite', - checked: !!JSON.parse(configDefaults.combineHashingCite) + checked: !!JSON.parse(configDefaults.combineHashingCite ?? false) }]; this.add(items); From e3db53eb3b268df580173f844daebc0f2477272e Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 08:39:53 -0800 Subject: [PATCH 36/45] Correct name of GLNexus step --- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index afe54151f..e3ba68f78 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -44,7 +44,7 @@ public class GLNexusHandler extends AbstractParameterizedOutputHandler Date: Wed, 31 Jan 2024 09:37:29 -0800 Subject: [PATCH 37/45] Update gVCF index locator in GLNexus step --- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index e3ba68f78..8a6620566 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -212,7 +212,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker for (File f : inputGvcfs) { gvcfsLocal.add(ensureLocalCopy(f, workDir, tracker)); - ensureLocalCopy(SequenceUtil.getExpectedIndex(f), workDir, tracker); + ensureLocalCopy(new File(f.getPath() + ".tbi"), workDir, tracker); } File localBashScript = new File(workDir, "docker.sh"); From 81193035b6ffd152cb82ab587c6f1dcc8ee196e2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 11:28:09 -0800 Subject: [PATCH 38/45] Add calculated field and code to catch HTO libraries with single HTO --- .../web/singlecell/panel/LibraryExportPanel.js | 13 ++++++++++++- .../singlecell/SingleCellTableCustomizer.java | 12 ++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js b/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js index e367fe34c..078b40988 100644 --- a/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js +++ b/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js @@ -634,7 +634,7 @@ Ext4.define('SingleCell.panel.LibraryExportPanel', { ',readsetId,readsetId/name,readsetId/application,readsetId/librarytype,readsetId/barcode5,readsetId/barcode5/sequence,readsetId/barcode3,readsetId/barcode3/sequence,readsetId/totalFiles,readsetId/concentration' + ',tcrReadsetId,tcrReadsetId/name,tcrReadsetId/application,tcrReadsetId/librarytype,tcrReadsetId/barcode5,tcrReadsetId/barcode5/sequence,tcrReadsetId/barcode3,tcrReadsetId/barcode3/sequence,tcrReadsetId/totalFiles,tcrReadsetId/concentration' + ',hashingReadsetId,hashingReadsetId/name,hashingReadsetId/application,hashingReadsetId/librarytype,hashingReadsetId/barcode5,hashingReadsetId/barcode5/sequence,hashingReadsetId/barcode3,hashingReadsetId/barcode3/sequence,hashingReadsetId/totalFiles,hashingReadsetId/concentration' + - ',citeseqReadsetId,citeseqReadsetId/name,citeseqReadsetId/application,citeseqReadsetId/librarytype,citeseqReadsetId/barcode5,citeseqReadsetId/barcode5/sequence,citeseqReadsetId/barcode3,citeseqReadsetId/barcode3/sequence,citeseqReadsetId/totalFiles,citeseqReadsetId/concentration', + ',citeseqReadsetId,citeseqReadsetId/name,citeseqReadsetId/application,citeseqReadsetId/librarytype,citeseqReadsetId/barcode5,citeseqReadsetId/barcode5/sequence,citeseqReadsetId/barcode3,citeseqReadsetId/barcode3/sequence,citeseqReadsetId/totalFiles,citeseqReadsetId/concentration,uniqueHtos', scope: this, filterArray: [LABKEY.Filter.create('plateId', plateIds.join(';'), LABKEY.Filter.Types.IN)], failure: LDK.Utils.getErrorCallback(), @@ -661,6 +661,7 @@ Ext4.define('SingleCell.panel.LibraryExportPanel', { if (expectedPairs) { sortedRows = []; var missingRows = []; + var errorMsgs = []; Ext4.Array.forEach(expectedPairs, function(p){ var found = false; Ext4.Array.forEach(results.rows, function(row){ @@ -679,6 +680,11 @@ Ext4.define('SingleCell.panel.LibraryExportPanel', { if (row['hashingReadsetId'] && row['hashingReadsetId/application'] && row['hashingReadsetId/application'] === 'Cell Hashing') { sortedRows.push(Ext4.apply({targetApplication: '10x HTO', laneAssignment: (p.length > 2 ? p[2] : null), plateAlias: (p.length > 3 ? p[3] : null)}, row)); found = true; + + if (row.uniqueHtos <=1) { + errorMsgs.push(row['hashingReadsetId/name'] + ': only ' + row.uniqueHtos + ' present') + } + return false; } } @@ -713,6 +719,11 @@ Ext4.define('SingleCell.panel.LibraryExportPanel', { Ext4.Msg.alert('Error', 'The following plates were not found:
' + missingRows.join('
')); return; } + + if (errorMsgs.length){ + Ext4.Msg.alert('Error', 'The following lanes had HTO libraries, without multiple HTOs:
' + errorMsgs.join('
')); + return; + } } var barcodes = 'Illumina'; diff --git a/singlecell/src/org/labkey/singlecell/SingleCellTableCustomizer.java b/singlecell/src/org/labkey/singlecell/SingleCellTableCustomizer.java index 8b578f61a..4d1776473 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellTableCustomizer.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellTableCustomizer.java @@ -94,6 +94,18 @@ private void customizeCdnas(AbstractTableInfo ti) } LDKService.get().applyNaturalSort(ti, "plateId"); + + String uniqueHTOs = "uniqueHtos"; + if (ti.getColumn(uniqueHTOs) == null) + { + SQLFragment sql = new SQLFragment("(SELECT count(DISTINCT s.hto) as expr FROM " + SingleCellSchema.NAME + "." + SingleCellSchema.TABLE_SORTS + " s WHERE s.rowid IN (SELECT DISTINCT sortId FROM " + SingleCellSchema.NAME + "." + SingleCellSchema.TABLE_CDNAS + " c WHERE c.plateid = " + ExprColumn.STR_TABLE_ALIAS + ".plateid AND c.container = " + ExprColumn.STR_TABLE_ALIAS + ".container))"); + ExprColumn newCol = new ExprColumn(ti, uniqueHTOs, sql, JdbcType.INTEGER, ti.getColumn("plateId"), ti.getColumn("container")); + newCol.setLabel("Distinct HTOs In Lane"); + DetailsURL details = DetailsURL.fromString("/query/executeQuery.view?schemaName=singlecell&query.queryName=cdna_libraries&query.plateId~eq=${plateId}", ti.getUserSchema().getContainer()); + newCol.setURL(details); + + ti.addColumn(newCol); + } } private void customizeSorts(AbstractTableInfo ti) From 04a31f41ccfcd35c0ecf8aed6670990a012474c3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 13:03:28 -0800 Subject: [PATCH 39/45] Update URLs for hashing/cite-seq feature counts --- singlecell/resources/views/singleCellDataManagement.html | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index bba369ea0..6be435912 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -28,8 +28,11 @@ name: '10x VDJ Libraries Needing Alignment', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.totalAlignments~eq': 0, 'query.totalForwardReads~isnonblank': null, 'query.application~contains': 'single', 'query.status~isblank': null, 'query.isArchived~eq': 0, 'query.librarytype~doesnotcontain': 'BCR', 'query.librarytype~contains': 'VDJ', 'query.sort': 'name'}) },{ - name: '10x Hashing/Cite-seq Libraries Needing Feature Counts', - url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~containsoneof': 'Cell Hashing;CITE-Seq', 'query.totalForwardReads~isnonblank': null, 'query.totalOutputs~eq': 0, 'query.status~isblank': null, 'query.sort': 'name'}) + name: 'Hashing Libraries Needing Feature Counts', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'Cell Hashing', 'query.totalForwardReads~isnonblank': null, 'query.readset/outputFileTypes~doesnotcontain': 'Cell Hashing Counts', 'query.status~isblank': null, 'query.sort': 'name'}) + },{ + name: 'Cite-seq Libraries Needing Feature Counts', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'CITE-Seq', 'query.totalForwardReads~isnonblank': null, 'query.readset/outputFileTypes~doesnotcontain': 'CITE-seq Counts', 'query.status~isblank': null, 'query.sort': 'name'}) },{ name: 'VLoupe Files Needing TCR Import', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'outputfiles', 'query.readset/status~isblank': null, 'query.readset/numTcrResults~eq': 0, 'query.category~eq': '10x VLoupe', 'query.sort': 'readset/name'}) From db938528060e7df89b419bd6bf5d32a4b820141e Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 15:54:11 -0800 Subject: [PATCH 40/45] Add feature to auto-split cell/lane for 10x import --- .../web/singlecell/panel/PoolImportPanel.js | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index dd0f909ba..8f7855897 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -606,6 +606,12 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { fieldLabel: 'Use 10x V2/HT (Dual Index)', itemId: 'useDualIndex', checked: true + },{ + xtype: 'checkbox', + fieldLabel: '# Cells Indicates Totla Per Lane', + helpPopup: '', + itemId: 'cellsReportedAsTotalPerLane', + checked: true },{ xtype: 'checkbox', fieldLabel: 'Use MS (Dual Index)', @@ -822,6 +828,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { data[col.name] = cell; + // This indicates that the first row from the plateId has a value for cells, but this does not. if (!cell && col.name === 'cells' && lastValueByCol[colIdx]) { doSplitCellsByPool = true; } @@ -832,7 +839,8 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }, this); //split cells across rows - if (doSplitCellsByPool) { + var cellsReportedAsTotalPerLane = this.down('#cellsReportedAsTotalPerLane').getValue(); + if (cellsReportedAsTotalPerLane || doSplitCellsByPool) { var cellCountMap = {}; Ext4.Array.forEach(ret, function(data) { if (data.plateId) { @@ -844,8 +852,18 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { Ext4.Array.forEach(Ext4.Object.getKeys(cellCountMap), function(plateId) { var arr = cellCountMap[plateId]; var size = arr.length; + + // Two allowable patterns: + // 1) the first row has a value and rest are blank. Take this as the lane total + // 2) all rows have the same value, so take the first as the lane total arr = Ext4.Array.remove(arr, null); arr = Ext4.Array.remove(arr, ''); + + // Only attempt to collapse if this was selected: + if (cellsReportedAsTotalPerLane) { + arr = Ext4.unique(arr); + } + if (arr.length === 1) { cellCountMap[plateId] = arr[0] / size; } From 2407a002355ab2948c39836125c04ac6e877dff2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 16:56:49 -0800 Subject: [PATCH 41/45] Fix docker args in GLNexusHandler --- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 +- singlecell/resources/views/singleCellDataManagement.html | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 8a6620566..d9e323c5f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -266,7 +266,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker throw new PipelineJobException(e); } - execute(Arrays.asList(localBashScript.getPath())); + execute(Arrays.asList("/bin/bash", localBashScript.getPath())); if (!outputVcf.exists()) { diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index 6be435912..7e4f2835d 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -42,6 +42,9 @@ },{ name: 'Readsets Assigned to Run Lacking Data', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.viewName': 'Assigned to Run Lacking Data'}) + },{ + name: 'cDNA Libraries with Hashing and Single HTO/Lane', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'singlecell', queryName: 'cdna_libraries', 'query.uniqueHtos~in': '0;1', 'query.hashingReadsetId/name~isnonblank': null, 'query.hashingReadsetId/totalFiles~eq': 0}) },{ name: 'Analyses In Novogene-Related Workbook', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_analyses', 'query.container/title~containsoneof': 'Novogene;shipment'}) From 00885c75427e0a861a61f175ea86ff307ee156e5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Jan 2024 18:23:23 -0800 Subject: [PATCH 42/45] Fix docker args in GLNexusHandler --- .../labkey/sequenceanalysis/analysis/GLNexusHandler.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index d9e323c5f..e7b5ab587 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -235,8 +235,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker } writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); + Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { @@ -244,11 +243,10 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker } writer.println("\tquay.io/mlin/glnexus:" + binVersion + " \\"); - writer.println("\tglnexus_cli" + " \\"); writer.println("\t--config DeepVariant" + " \\"); gvcfsLocal.forEach(f -> { - writer.println("\t-i gvcf=/work/" + f.getPath() + " \\"); + writer.println("\t-i gvcf=/work/" + f.getName() + " \\"); }); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); From 594ad2c077d09e33c5e991e048b999c3407ab623 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 1 Feb 2024 10:52:14 -0800 Subject: [PATCH 43/45] Update location of genome symlinks in mGAP --- singlecell/resources/views/singleCellDataManagement.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index 7e4f2835d..301d70982 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -29,10 +29,10 @@ url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.totalAlignments~eq': 0, 'query.totalForwardReads~isnonblank': null, 'query.application~contains': 'single', 'query.status~isblank': null, 'query.isArchived~eq': 0, 'query.librarytype~doesnotcontain': 'BCR', 'query.librarytype~contains': 'VDJ', 'query.sort': 'name'}) },{ name: 'Hashing Libraries Needing Feature Counts', - url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'Cell Hashing', 'query.totalForwardReads~isnonblank': null, 'query.readset/outputFileTypes~doesnotcontain': 'Cell Hashing Counts', 'query.status~isblank': null, 'query.sort': 'name'}) + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'Cell Hashing', 'query.totalForwardReads~isnonblank': null, 'query.outputFileTypes~doesnotcontain': 'Cell Hashing Counts', 'query.status~isblank': null, 'query.sort': 'name'}) },{ name: 'Cite-seq Libraries Needing Feature Counts', - url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'CITE-Seq', 'query.totalForwardReads~isnonblank': null, 'query.readset/outputFileTypes~doesnotcontain': 'CITE-seq Counts', 'query.status~isblank': null, 'query.sort': 'name'}) + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'sequence_readsets', 'query.application~eq': 'CITE-Seq', 'query.totalForwardReads~isnonblank': null, 'query.outputFileTypes~doesnotcontain': 'CITE-seq Counts', 'query.status~isblank': null, 'query.sort': 'name'}) },{ name: 'VLoupe Files Needing TCR Import', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {schemaName: 'sequenceanalysis', queryName: 'outputfiles', 'query.readset/status~isblank': null, 'query.readset/numTcrResults~eq': 0, 'query.category~eq': '10x VLoupe', 'query.sort': 'readset/name'}) From 26471accab98359aa4afa5fd0e4e24c1677d2c53 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 1 Feb 2024 12:37:22 -0800 Subject: [PATCH 44/45] Update glnexus arguments to match newest version --- .../analysis/GLNexusHandler.java | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index e7b5ab587..54d3934a4 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -48,6 +48,15 @@ public GLNexusHandler() ToolParameterDescriptor.create("binVersion", "GLNexus Version", "The version of GLNexus to run, which is passed to their docker container", "textfield", new JSONObject(){{ put("allowBlank", false); }}, "v1.2.7"), + ToolParameterDescriptor.create("configType", "Config Type", "This is passed to the --config argument of GLNexus.", "ldk-simplecombo", new JSONObject() + {{ + put("multiSelect", false); + put("allowBlank", false); + put("storeValues", "gatk;DeepVariant;DeepVariantWGS;DeepVariantWES"); + put("initialValues", "DeepVariant"); + put("delimiter", ";"); + put("joinReturnValue", true); + }}, null), ToolParameterDescriptor.create("fileBaseName", "Filename", "This is the basename that will be used for the output gzipped VCF", "textfield", new JSONObject(){{ put("allowBlank", false); }}, "CombinedGenotypes") @@ -144,9 +153,15 @@ else if (genomeIds.isEmpty()) throw new PipelineJobException("Missing binVersion"); } + String configType = ctx.getParams().optString("configType", "DeepVariant"); + if (configType == null) + { + throw new PipelineJobException("Missing configType"); + } + File outputVcf = new File(ctx.getOutputDir(), basename + ".vcf.gz"); - new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, outputVcf, ctx.getFileManager(), binVersion); + new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, outputVcf, ctx.getFileManager(), binVersion, configType); ctx.getLogger().debug("adding sequence output: " + outputVcf.getPath()); SequenceOutputFile so1 = new SequenceOutputFile(); @@ -202,7 +217,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion) throws PipelineJobException + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType) throws PipelineJobException { File workDir = outputVcf.getParentFile(); tracker.addIntermediateFile(outputVcf); @@ -242,12 +257,15 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("\t--memory='" + maxRam + "g' \\"); } writer.println("\tquay.io/mlin/glnexus:" + binVersion + " \\"); + writer.println("\tglnexus_cli \\"); + writer.println("\t--config " + configType + " \\"); - writer.println("\t--config DeepVariant" + " \\"); + writer.println("\t--trim-uncalled-alleles \\"); - gvcfsLocal.forEach(f -> { - writer.println("\t-i gvcf=/work/" + f.getName() + " \\"); - }); + if (maxRam != null) + { + writer.println("\t--mem-gbytes " + maxRam + "\\"); + } Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); if (maxThreads != null) @@ -255,9 +273,21 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("\t--threads " + maxThreads + " \\"); } + gvcfsLocal.forEach(f -> { + writer.println("\t/work/" + f.getName() + " \\"); + }); + File bcftools = BcftoolsRunner.getBcfToolsPath(); File bgzip = BgzipRunner.getExe(); writer.println("\t| " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); + + // Command will fail if this exists: + File dbDir = new File (outputVcf.getParentFile(), "GLnexus.DB"); + + if (dbDir.exists()) + { + FileUtils.deleteDirectory(dbDir); + } } catch (IOException e) { From fcc8b13db77e06eb45c00f3ff1d4c3bb2b6dca32 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 1 Feb 2024 14:01:51 -0800 Subject: [PATCH 45/45] Set workDir for DeepVariant/GLNexus --- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 +- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 54d3934a4..3cb14356d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -283,7 +283,6 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker // Command will fail if this exists: File dbDir = new File (outputVcf.getParentFile(), "GLnexus.DB"); - if (dbDir.exists()) { FileUtils.deleteDirectory(dbDir); @@ -294,6 +293,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker throw new PipelineJobException(e); } + setWorkingDir(workDir); execute(Arrays.asList("/bin/bash", localBashScript.getPath())); if (!outputVcf.exists()) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 06121ab4f..06a5c1b96 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -284,6 +284,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu throw new PipelineJobException(e); } + setWorkingDir(workDir); execute(Arrays.asList("/bin/bash", localBashScript.getPath())); if (!outputGvcf.exists())