LabKey · bbimber · Dec 28, 2021 · Dec 23, 2021 · Dec 23, 2021 · Dec 23, 2021
diff --git a/.gitignore b/.gitignore
@@ -6,5 +6,17 @@
 /mcc/resources/views/gen
 /mcc/resources/web/mcc/gen
 
+elispot_assay/resources/credits/dependencies.txt
+elispot_assay/resources/credits/jars.txt
+
+mcc/resources/credits/dependencies.txt
+mcc/resources/credits/jars.txt
+
+mGAP/resources/credits/dependencies.txt
+mGAP/resources/credits/jars.txt
+
+tcrdb/resources/credits/dependencies.txt
+tcrdb/resources/credits/jars.txt
+
 variantdb/resources/credits/dependencies.txt
 variantdb/resources/credits/jars.txt
diff --git a/mGAP/resources/etls/prime-seq.xml b/mGAP/resources/etls/prime-seq.xml
@@ -91,6 +91,8 @@
                     <column>liftedVcfId/dataid/DataFileUrl</column>
                     <column>liftedVcfId/name</column>
                     <column>liftedVcfId/library_id/name</column>
+                    <column>sitesOnlyVcfId/dataid/DataFileUrl</column>
+                    <column>sitesOnlyVcfId/name</column>
                     <column>humanJbrowseId</column>
                     <column>objectId</column>
                 </sourceColumns>
@@ -99,6 +101,7 @@
                 <columnTransforms>
                     <column source="genomeId/name" target="genomeId" transformClass="org.labkey.mgap.columnTransforms.GenomeTransform" />
                     <column source="vcfId/dataid/DataFileUrl" target="vcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
+                    <column source="sitesOnlyVcfId/dataid/DataFileUrl" target="sitesOnlyVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
                     <column source="jbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseSessionTransform"/>
                     <column source="liftedVcfId/dataid/DataFileUrl" target="liftedVcfId" transformClass="org.labkey.mgap.columnTransforms.LiftedVcfTransform" />
                     <column source="humanJbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseHumanSessionTransform"/>

diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
@@ -0,0 +1 @@
+ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;
diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
@@ -0,0 +1 @@
+ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;
diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
@@ -149,6 +149,15 @@
                     <fkColumnName>rowid</fkColumnName>
                 </fk>
             </column>
+            <column columnName="sitesOnlyVcfId">
+                <columnTitle>Sites-Only VCF File</columnTitle>
+                <nullable>true</nullable>
+                <fk>
+                    <fkDbSchema>sequenceanalysis</fkDbSchema>
+                    <fkTable>outputfiles</fkTable>
+                    <fkColumnName>rowid</fkColumnName>
+                </fk>
+            </column>
             <column columnName="humanJbrowseId">
                 <columnTitle>Genome Browser (Human)</columnTitle>
                 <nullable>true</nullable>

diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java
@@ -59,7 +59,7 @@ public String getName()
     @Override
     public Double getSchemaVersion()
     {
-        return 16.59;
+        return 16.60;
     }
 
     @Override

diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
@@ -266,6 +266,7 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
             Map<String, SequenceOutputFile> outputVCFMap = new HashMap<>();
             Map<String, SequenceOutputFile> outputTableMap = new HashMap<>();
             Map<String, SequenceOutputFile> liftedVcfMap = new HashMap<>();
+            Map<String, SequenceOutputFile> sitesOnlyVcfMap = new HashMap<>();
             Map<String, SequenceOutputFile> trackVCFMap = new HashMap<>();
 
             for (SequenceOutputFile so : outputsCreated)
@@ -285,6 +286,11 @@ else if (so.getCategory().contains("Lifted"))
                     String name = so.getName().replaceAll(" Lifted to Human", "");
                     liftedVcfMap.put(name, so);
                 }
+                else if (so.getCategory().contains("mGAP Release: Sites Only"))
+                {
+                    String name = so.getName().replaceAll(": Sites Only", "");
+                    sitesOnlyVcfMap.put(name, so);
+                }
                 else if (so.getCategory().endsWith("Release"))
                 {
                     outputVCFMap.put(so.getName(), so);
@@ -328,6 +334,12 @@ else if (so.getCategory().endsWith("Release Track"))
                     throw new PipelineJobException("Unable to find lifted VCF for release: " + release);
                 }
 
+                SequenceOutputFile sitesOnlyVcf = sitesOnlyVcfMap.get(release);
+                if (sitesOnlyVcf == null)
+                {
+                    throw new PipelineJobException("Unable to find sites-only VCF for release: " + release);
+                }
+
                 //find basic stats:
                 job.getLogger().info("inspecting file: " + so.getName());
                 int totalSubjects;
@@ -391,6 +403,7 @@ else if (so.getCategory().endsWith("Release Track"))
                 row.put("releaseDate", new Date());
                 row.put("vcfId", so.getRowid());
                 row.put("liftedVcfId", liftedVcf.getRowid());
+                row.put("sitesOnlyVcfId", sitesOnlyVcf.getRowid());
                 row.put("variantTable", so2.getRowid());
                 row.put("genomeId", so.getLibrary_id());
                 row.put("totalSubjects", totalSubjects);
@@ -908,8 +921,12 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, ReferenceGenome s
                 wrapper.execute(sourceGenome.getWorkingFastaFile(), primaryTrackVcf, noGenotypes, Arrays.asList("--sites-only-vcf-output"));
             }
 
-            ctx.getFileManager().addIntermediateFile(noGenotypes);
-            ctx.getFileManager().addIntermediateFile(new File(noGenotypes.getPath() + ".tbi"));
+            SequenceOutputFile output = new SequenceOutputFile();
+            output.setFile(noGenotypes);
+            output.setName(primaryTrackVcf.getName() + ": Sites Only");
+            output.setCategory("mGAP Release: Sites Only");
+            output.setLibrary_id(sourceGenome.getGenomeId());
+            ctx.getFileManager().addSequenceOutput(output);
 
             //lift to target genome
             Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class);

diff --git a/tcrdb/resources/assay/TCRdb/domains/run.xml b/tcrdb/resources/assay/TCRdb/domains/run.xml
@@ -23,12 +23,6 @@
         <exp:RangeURI>http://www.w3.org/2001/XMLSchema#multiLine</exp:RangeURI>
         <exp:Label>Comments</exp:Label>
     </exp:PropertyDescriptor>
-    <exp:PropertyDescriptor>
-        <exp:Name>assayId</exp:Name>
-        <exp:Required>false</exp:Required>
-        <exp:RangeURI>http://www.w3.org/2001/XMLSchema#int</exp:RangeURI>
-        <exp:Label>Assay Id</exp:Label>
-    </exp:PropertyDescriptor>
     <exp:PropertyDescriptor>
         <exp:Name>performedBy</exp:Name>
         <exp:Required>false</exp:Required>

diff --git a/tcrdb/src/org/labkey/tcrdb/TCRdbController.java b/tcrdb/src/org/labkey/tcrdb/TCRdbController.java
@@ -706,6 +706,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
                     imputedSequences.append(rs.getString(FieldKey.fromString("sequence"))).append("\n");
                 }
 
+                // This applies to MiXCR
                 if (rs.getObject(FieldKey.fromString("cloneId")) != null && rs.getObject(FieldKey.fromString("clonesFile")) != null)
                 {
                     Integer key = rs.getInt(FieldKey.fromString("clonesFile"));
@@ -714,7 +715,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
 
                     clnaToCloneMap.put(key, set);
 
-                    clnaToCDR3Map.put(key.toString() + "_" + rs.getString(FieldKey.fromString("cloneId")), rs.getString(FieldKey.fromString("cdr3")));
+                    clnaToCDR3Map.put(key + "_" + rs.getString(FieldKey.fromString("cloneId")), rs.getString(FieldKey.fromString("cdr3")));
                 }
             });
 
@@ -830,7 +831,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
 
                 for (String cloneId : clnaToCloneMap.get(expData))
                 {
-                    String cdr3 = clnaToCDR3Map.get(expData.toString() + "_" + cloneId);
+                    String cdr3 = clnaToCDR3Map.get(expData + "_" + cloneId);
 
                     File fq1 = new File(fqBase.getParentFile(), basename + "_cln" + cloneId + "_R1.fastq.gz");
                     if (!fq1.exists())

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
@@ -38,14 +38,13 @@
 
 public class CellRangerVDJCellHashingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
-    private FileType _vloupeFileType = new FileType("vloupe", false);
-    private FileType _htmlFileType = new FileType("html", false);
+    private final FileType _vloupeFileType = new FileType("vloupe", false);
+    private final FileType _htmlFileType = new FileType("html", false);
 
     public static final String CATEGORY = "Cell Hashing Calls (VDJ)";
 
     public static final String TARGET_ASSAY = "targetAssay";
     public static final String DELETE_EXISTING_ASSAY_DATA = "deleteExistingAssayData";
-    public static final String ALLOW_GD_RECOVERY = "allowGDRecovery";
     public static final String USE_GEX_BARCODES = "useGexBarcodes";
 
     public CellRangerVDJCellHashingHandler()
@@ -60,15 +59,12 @@ private static List<ToolParameterDescriptor> getDefaultParams()
                 ToolParameterDescriptor.create(DELETE_EXISTING_ASSAY_DATA, "Delete Any Existing Assay Data", "If selected, prior to importing assay data, and existing assay runs in the target container from this readset will be deleted.", "checkbox", new JSONObject(){{
                     put("checked", true);
                 }}, true),
-                ToolParameterDescriptor.create(ALLOW_GD_RECOVERY, "Perform G/D Recovery", "Cellranger marks TRG/TRD rows as non-productive. As a result, gamma/delta cells will tend to be marked non-cell, since the cell lacks productive A/B chain. If selected, the code will ignore the cellranger is_cell flag to recover cells if the row is TRD/TRG, it has a CDR3 and is full-length.", "checkbox", new JSONObject(){{
-                    put("checked", false);
-                }}, false),
                 ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook.  This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
                     put("checked", true);
-                }}, false),
-                ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
-                    put("checked", true);
                 }}, false)
+//                ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
+//                    put("checked", true);
+//                }}, false)
         ));
 
         ret.addAll(CellHashingService.get().getHashingCallingParams(false));
@@ -163,16 +159,10 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputFiles, List<
                     deleteExistingData = ConvertHelper.convert(job.getParameters().get(DELETE_EXISTING_ASSAY_DATA), Boolean.class);
                 }
 
-                boolean allowGDRecovery = false;
-                if (job.getParameters().get(ALLOW_GD_RECOVERY) != null)
-                {
-                    allowGDRecovery = ConvertHelper.convert(job.getParameters().get(ALLOW_GD_RECOVERY), Boolean.class);
-                }
-
                 for (SequenceOutputFile so : inputFiles)
                 {
                     AnalysisModel model = support.getCachedAnalysis(so.getAnalysis_id());
-                    new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData, allowGDRecovery);
+                    new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData);
                 }
             }
         }
@@ -225,8 +215,7 @@ private void processVloupeFile(JobContext ctx, File perCellTsv, Readset rs, Reco
                 parameters.basename = FileUtil.makeLegalName(rs.getName());
                 parameters.allowableHtoBarcodes = htosPerReadset;
 
-                boolean allowGDRecovery = ctx.getParams().optBoolean(ALLOW_GD_RECOVERY, false);
-                parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true, allowGDRecovery);
+                parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true);
                 File existingCountMatrixUmiDir = CellHashingService.get().getExistingFeatureBarcodeCountDir(rs, CellHashingService.BARCODE_TYPE.hashing, ctx.getSequenceSupport());
 
                 File cellToHto = CellHashingService.get().generateHashingCallsForRawMatrix(rs, output, ctx, parameters, existingCountMatrixUmiDir);
@@ -248,52 +237,56 @@ else if (htosPerReadset.size() == 1)
             }
         }
 
-        private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3, boolean allowGDRecovery) throws PipelineJobException
+        private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3) throws PipelineJobException
         {
             //prepare whitelist of cell indexes based on TCR calls:
             File cellBarcodeWhitelist = new File(ctx.getSourceDirectory(), "validCellIndexes.csv");
             Set<String> uniqueBarcodes = new HashSet<>();
             Set<String> uniqueBarcodesIncludingNoCDR3 = new HashSet<>();
             ctx.getLogger().debug("writing cell barcodes, using file: " + perCellTsv.getPath());
             ctx.getLogger().debug("allow cells lacking CDR3: " + allowCellsLackingCDR3);
-            ctx.getLogger().debug("allow gamma/delta recovery: " + allowGDRecovery);
 
             int totalBarcodeWritten = 0;
+            int cellbarcodeIdx = 0;
+            int notCellIdx = 1;
+            int cdr3Idx = -1;
             try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(cellBarcodeWhitelist), ',', CSVWriter.NO_QUOTE_CHARACTER); CSVReader reader = new CSVReader(Readers.getReader(perCellTsv), ','))
             {
                 int rowIdx = 0;
                 int noCallRows = 0;
                 int nonCell = 0;
-                int recoveredGD = 0;
                 String[] row;
                 while ((row = reader.readNext()) != null)
                 {
                     //skip header
                     rowIdx++;
-                    if (rowIdx > 1)
+                    if (rowIdx == 1)
+                    {
+                        List<String> header = Arrays.asList(row);
+                        cdr3Idx = header.indexOf("cdr3");
+                        if (cdr3Idx == -1)
+                        {
+                            throw new PipelineJobException("Unable to find CDR3 field in header: " + perCellTsv.getPath());
+                        }
+                    }
+                    else
                     {
-                        if ("False".equalsIgnoreCase(row[1]))
+                        if ("False".equalsIgnoreCase(row[notCellIdx]))
                         {
-                            if (allowGDRecovery && CellRangerVDJUtils.shouldRecoverGammaDeltaRow(row))
-                            {
-                                recoveredGD++;
-                            }
-                            else
-                            {
-                                nonCell++;
-                                continue;
-                            }
+                            nonCell++;
+                            continue;
                         }
 
                         //NOTE: allow these to pass for cell-hashing under some conditions
-                        boolean hasCDR3 = !"None".equals(row[12]);
+                        String cdr3String = StringUtils.trimToNull(row[cdr3Idx]);
+                        boolean hasCDR3 = cdr3String != null && !"None".equals(cdr3String);
                         if (!hasCDR3)
                         {
                             noCallRows++;
                         }
 
                         //NOTE: 10x appends "-1" to barcodes
-                        String barcode = row[0].split("-")[0];
+                        String barcode = row[cellbarcodeIdx].split("-")[0];
                         if ((allowCellsLackingCDR3 || hasCDR3) && !uniqueBarcodes.contains(barcode))
                         {
                             writer.writeNext(new String[]{barcode});
@@ -308,7 +301,6 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                 ctx.getLogger().debug("rows inspected: " + (rowIdx - 1));
                 ctx.getLogger().debug("rows without CDR3: " + noCallRows);
                 ctx.getLogger().debug("rows not called as cells: " + nonCell);
-                ctx.getLogger().debug("gamma/delta clonotype rows recovered: " + recoveredGD);
                 ctx.getLogger().debug("unique cell barcodes (with CDR3): " + uniqueBarcodes.size());
                 ctx.getLogger().debug("unique cell barcodes (including no CDR3): " + uniqueBarcodesIncludingNoCDR3.size());
                 ctx.getFileManager().addIntermediateFile(cellBarcodeWhitelist);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;