From db6ad144006f70e47de21eb64fb63bd045954791 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 22 Dec 2021 18:04:02 -0800
Subject: [PATCH 01/10] Add quicker validation over unknown ADT panels

---
 tcrdb/resources/assay/TCRdb/domains/run.xml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tcrdb/resources/assay/TCRdb/domains/run.xml b/tcrdb/resources/assay/TCRdb/domains/run.xml
index 086ab7c2a..a3820c4ad 100644
--- a/tcrdb/resources/assay/TCRdb/domains/run.xml
+++ b/tcrdb/resources/assay/TCRdb/domains/run.xml
@@ -23,12 +23,6 @@
         <exp:RangeURI>http://www.w3.org/2001/XMLSchema#multiLine</exp:RangeURI>
         <exp:Label>Comments</exp:Label>
     </exp:PropertyDescriptor>
-    <exp:PropertyDescriptor>
-        <exp:Name>assayId</exp:Name>
-        <exp:Required>false</exp:Required>
-        <exp:RangeURI>http://www.w3.org/2001/XMLSchema#int</exp:RangeURI>
-        <exp:Label>Assay Id</exp:Label>
-    </exp:PropertyDescriptor>
     <exp:PropertyDescriptor>
         <exp:Name>performedBy</exp:Name>
         <exp:Required>false</exp:Required>

From d85ddfe807a62b10973b8f858590325878a8e1b9 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 22 Dec 2021 19:00:13 -0800
Subject: [PATCH 02/10] Allow more streamlined inclusion of g/d sequences in
 cellranger vdj

---
 .../CellRangerVDJCellHashingHandler.java      | 31 +++----------------
 .../tcrdb/pipeline/CellRangerVDJUtils.java    | 18 ++---------
 2 files changed, 8 insertions(+), 41 deletions(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
index 34344733b..c23954c52 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
@@ -45,7 +45,6 @@ public class CellRangerVDJCellHashingHandler extends AbstractParameterizedOutput
 
     public static final String TARGET_ASSAY = "targetAssay";
     public static final String DELETE_EXISTING_ASSAY_DATA = "deleteExistingAssayData";
-    public static final String ALLOW_GD_RECOVERY = "allowGDRecovery";
     public static final String USE_GEX_BARCODES = "useGexBarcodes";
 
     public CellRangerVDJCellHashingHandler()
@@ -60,9 +59,6 @@ private static List<ToolParameterDescriptor> getDefaultParams()
                 ToolParameterDescriptor.create(DELETE_EXISTING_ASSAY_DATA, "Delete Any Existing Assay Data", "If selected, prior to importing assay data, and existing assay runs in the target container from this readset will be deleted.", "checkbox", new JSONObject(){{
                     put("checked", true);
                 }}, true),
-                ToolParameterDescriptor.create(ALLOW_GD_RECOVERY, "Perform G/D Recovery", "Cellranger marks TRG/TRD rows as non-productive. As a result, gamma/delta cells will tend to be marked non-cell, since the cell lacks productive A/B chain. If selected, the code will ignore the cellranger is_cell flag to recover cells if the row is TRD/TRG, it has a CDR3 and is full-length.", "checkbox", new JSONObject(){{
-                    put("checked", false);
-                }}, false),
                 ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook.  This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
                     put("checked", true);
                 }}, false),
@@ -163,16 +159,10 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputFiles, List<
                     deleteExistingData = ConvertHelper.convert(job.getParameters().get(DELETE_EXISTING_ASSAY_DATA), Boolean.class);
                 }
 
-                boolean allowGDRecovery = false;
-                if (job.getParameters().get(ALLOW_GD_RECOVERY) != null)
-                {
-                    allowGDRecovery = ConvertHelper.convert(job.getParameters().get(ALLOW_GD_RECOVERY), Boolean.class);
-                }
-
                 for (SequenceOutputFile so : inputFiles)
                 {
                     AnalysisModel model = support.getCachedAnalysis(so.getAnalysis_id());
-                    new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData, allowGDRecovery);
+                    new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData);
                 }
             }
         }
@@ -225,8 +215,7 @@ private void processVloupeFile(JobContext ctx, File perCellTsv, Readset rs, Reco
                 parameters.basename = FileUtil.makeLegalName(rs.getName());
                 parameters.allowableHtoBarcodes = htosPerReadset;
 
-                boolean allowGDRecovery = ctx.getParams().optBoolean(ALLOW_GD_RECOVERY, false);
-                parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true, allowGDRecovery);
+                parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true);
                 File existingCountMatrixUmiDir = CellHashingService.get().getExistingFeatureBarcodeCountDir(rs, CellHashingService.BARCODE_TYPE.hashing, ctx.getSequenceSupport());
 
                 File cellToHto = CellHashingService.get().generateHashingCallsForRawMatrix(rs, output, ctx, parameters, existingCountMatrixUmiDir);
@@ -248,7 +237,7 @@ else if (htosPerReadset.size() == 1)
             }
         }
 
-        private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3, boolean allowGDRecovery) throws PipelineJobException
+        private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3) throws PipelineJobException
         {
             //prepare whitelist of cell indexes based on TCR calls:
             File cellBarcodeWhitelist = new File(ctx.getSourceDirectory(), "validCellIndexes.csv");
@@ -256,7 +245,6 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
             Set<String> uniqueBarcodesIncludingNoCDR3 = new HashSet<>();
             ctx.getLogger().debug("writing cell barcodes, using file: " + perCellTsv.getPath());
             ctx.getLogger().debug("allow cells lacking CDR3: " + allowCellsLackingCDR3);
-            ctx.getLogger().debug("allow gamma/delta recovery: " + allowGDRecovery);
 
             int totalBarcodeWritten = 0;
             try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(cellBarcodeWhitelist), ',', CSVWriter.NO_QUOTE_CHARACTER); CSVReader reader = new CSVReader(Readers.getReader(perCellTsv), ','))
@@ -264,7 +252,6 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                 int rowIdx = 0;
                 int noCallRows = 0;
                 int nonCell = 0;
-                int recoveredGD = 0;
                 String[] row;
                 while ((row = reader.readNext()) != null)
                 {
@@ -274,15 +261,8 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                     {
                         if ("False".equalsIgnoreCase(row[1]))
                         {
-                            if (allowGDRecovery && CellRangerVDJUtils.shouldRecoverGammaDeltaRow(row))
-                            {
-                                recoveredGD++;
-                            }
-                            else
-                            {
-                                nonCell++;
-                                continue;
-                            }
+                            nonCell++;
+                            continue;
                         }
 
                         //NOTE: allow these to pass for cell-hashing under some conditions
@@ -308,7 +288,6 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                 ctx.getLogger().debug("rows inspected: " + (rowIdx - 1));
                 ctx.getLogger().debug("rows without CDR3: " + noCallRows);
                 ctx.getLogger().debug("rows not called as cells: " + nonCell);
-                ctx.getLogger().debug("gamma/delta clonotype rows recovered: " + recoveredGD);
                 ctx.getLogger().debug("unique cell barcodes (with CDR3): " + uniqueBarcodes.size());
                 ctx.getLogger().debug("unique cell barcodes (including no CDR3): " + uniqueBarcodesIncludingNoCDR3.size());
                 ctx.getFileManager().addIntermediateFile(cellBarcodeWhitelist);
diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
index 3c156f26f..5095c9c14 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
@@ -65,7 +65,7 @@ public CellRangerVDJUtils(Logger log)
         _log = log;
     }
 
-    public void importAssayData(PipelineJob job, AnalysisModel model, File vLoupeFile, File outDir, Integer assayId, @Nullable Integer runId, boolean deleteExisting, boolean allowGDRecovery) throws PipelineJobException
+    public void importAssayData(PipelineJob job, AnalysisModel model, File vLoupeFile, File outDir, Integer assayId, @Nullable Integer runId, boolean deleteExisting) throws PipelineJobException
     {
         File cellRangerOutDir = vLoupeFile.getParentFile();
 
@@ -107,7 +107,6 @@ public void importAssayData(PipelineJob job, AnalysisModel model, File vLoupeFil
         }
 
         _log.info("loading results into assay: " + assayId);
-        _log.info("allow gamma/delta recovery: " + allowGDRecovery);
 
         if (runId == null)
         {
@@ -278,7 +277,6 @@ else if ("Negative".equals(hto))
             int noCDR3 = 0;
             int noCGene = 0;
             int notFullLength = 0;
-            int recoveredGD = 0;
             int nonCell = 0;
             int totalSkipped = 0;
             int doubletSkipped = 0;
@@ -297,17 +295,8 @@ else if ("Negative".equals(hto))
 
                 if ("False".equalsIgnoreCase(line[1]))
                 {
-                    //NOTE: cellranger marks TRG/TRD rows as non-productive. therefore, gamma/delta cells will tend to be marked non-cell, since the cell lacks productive A/B
-                    // Allow recovery of these cells if the row is TRD/TRG, has a CDR3 and is full-length:
-                    if (allowGDRecovery && shouldRecoverGammaDeltaRow(line))
-                    {
-                        recoveredGD++;
-                    }
-                    else
-                    {
-                        nonCell++;
-                        continue;
-                    }
+                    nonCell++;
+                    continue;
                 }
 
                 if ("None".equals(line[12]))
@@ -409,7 +398,6 @@ else if (discordantBarcodes.contains(barcode))
             _log.info("total clonotype rows without CDR3: " + noCDR3);
             _log.info("total clonotype rows discarded for no C-gene: " + noCGene);
             _log.info("total clonotype rows discarded for not full length: " + notFullLength);
-            _log.info("total gamma/delta clonotype rows recovered: " + recoveredGD);
             _log.info("total clonotype rows skipped for unknown barcodes: " + totalSkipped + " (" + (NumberFormat.getPercentInstance().format(totalSkipped / (double)totalCells)) + ")");
             _log.info("total clonotype rows skipped because they are doublets: " + doubletSkipped + " (" + (NumberFormat.getPercentInstance().format(doubletSkipped / (double)totalCells)) + ")");
             _log.info("total clonotype rows skipped because they are discordant calls: " + discordantSkipped + " (" + (NumberFormat.getPercentInstance().format(discordantSkipped / (double)totalCells)) + ")");

From 16fae2e293b88d39c9f6aed6b781790b7db872a5 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 23 Dec 2021 11:58:20 -0800
Subject: [PATCH 03/10] Add files to .gitignore

---
 .gitignore | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.gitignore b/.gitignore
index 144b1d188..caf64a360 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,17 @@
 /mcc/resources/views/gen
 /mcc/resources/web/mcc/gen
 
+elispot_assay/resources/credits/dependencies.txt
+elispot_assay/resources/credits/jars.txt
+
+mcc/resources/credits/dependencies.txt
+mcc/resources/credits/jars.txt
+
+mGAP/resources/credits/dependencies.txt
+mGAP/resources/credits/jars.txt
+
+tcrdb/resources/credits/dependencies.txt
+tcrdb/resources/credits/jars.txt
+
 variantdb/resources/credits/dependencies.txt
 variantdb/resources/credits/jars.txt

From 05365f2a90ebb316b3664246db9c4931c4ad3eb5 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Dec 2021 06:11:37 -0800
Subject: [PATCH 04/10] Update CellRanger VDJ parsing to support versions 3 and
 6

---
 .../tcrdb/pipeline/CellRangerVDJUtils.java    | 109 ++++++++++++------
 1 file changed, 73 insertions(+), 36 deletions(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
index 5095c9c14..416504373 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
@@ -269,7 +269,11 @@ else if ("Negative".equals(hto))
 
         // use unfiltered data so we can apply demultiplex and also apply alternate filtering logic.
         // also, 10x no longer reports TRD/TRG data in their consensus file
-        //header: barcode	is_cell	contig_id	high_confidence	length	chain	v_gene	d_gene	j_gene	c_gene	full_length	productive	cdr3	cdr3_nt	reads	umis	raw_clonotype_id	raw_consensus_id
+        //header for 3.x and lower:
+        // barcode	is_cell	contig_id	high_confidence	length	chain	v_gene	d_gene	j_gene	c_gene	full_length	productive	cdr3	cdr3_nt	reads	umis	raw_clonotype_id	raw_consensus_id
+
+        //header for 6.x:
+        // barcode	is_cell	contig_id	high_confidence	length	chain	v_gene	d_gene	j_gene	c_gene	full_length	productive	fwr1	fwr1_nt	cdr1	cdr1_nt	fwr2	fwr2_nt	cdr2	cdr2_nt	fwr3	fwr3_nt	cdr3	cdr3_nt	fwr4	fwr4_nt	reads	umis	raw_clonotype_id	raw_consensus_id	exact_subclonotype_id
         try (CSVReader reader = new CSVReader(Readers.getReader(allCsv), ','))
         {
             String[] line;
@@ -284,46 +288,49 @@ else if ("Negative".equals(hto))
             int hasCDR3NoClonotype = 0;
             int multiChainConverted = 0;
             Set<String> knownBarcodes = new HashSet<>();
+
+            Map<HEADER_FIELD, Integer> headerToIdx = null;
             while ((line = reader.readNext()) != null)
             {
                 idx++;
                 if (idx == 1)
                 {
                     _log.debug("skipping header, length: " + line.length);
+                    headerToIdx = inferFieldIdx(line);
                     continue;
                 }
 
-                if ("False".equalsIgnoreCase(line[1]))
+                if ("False".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.IS_CELL)]))
                 {
                     nonCell++;
                     continue;
                 }
 
-                if ("None".equals(line[12]))
+                if ("None".equals(line[headerToIdx.get(HEADER_FIELD.CDR3)]))
                 {
                     noCDR3++;
                     continue;
                 }
 
-                String cGene = removeNone(line[9]);
+                String cGene = removeNone(line[headerToIdx.get(HEADER_FIELD.C_GENE)]);
                 if (cGene == null)
                 {
                     // Only discard these if chain type doesnt match between JGene and VGene.
-                    if (!line[8].substring(0, 3).equals(line[6].substring(0,3)))
+                    if (!line[headerToIdx.get(HEADER_FIELD.J_GENE)].substring(0, 3).equals(line[headerToIdx.get(HEADER_FIELD.V_GENE)].substring(0,3)))
                     {
                         noCGene++;
                         continue;
                     }
                 }
 
-                if ("False".equals(line[10]))
+                if ("False".equals(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
                 {
                     notFullLength++;
                     continue;
                 }
 
                 //NOTE: 10x appends "-1" to barcode sequences
-                String barcode = line[0].split("-")[0];
+                String barcode = line[headerToIdx.get(HEADER_FIELD.CELL_BARCODE)].split("-")[0];
                 Integer cDNA = useCellHashing ? cellBarcodeToCDNAMap.get(barcode) : defaultCDNA;
                 if (cDNA == null)
                 {
@@ -344,9 +351,9 @@ else if (discordantBarcodes.contains(barcode))
                 }
                 knownBarcodes.add(barcode);
 
-                String clonotypeId = removeNone(line[16]);
-                String cdr3 = removeNone(line[12]);
-                if (clonotypeId == null && cdr3 != null && "TRUE".equalsIgnoreCase(line[10]))
+                String clonotypeId = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CLONOTYPE_ID)]);
+                String cdr3 = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3)]);
+                if (clonotypeId == null && cdr3 != null && "TRUE".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
                 {
                     hasCDR3NoClonotype++;
                 }
@@ -357,13 +364,13 @@ else if (discordantBarcodes.contains(barcode))
                 }
 
                 //Preferentially use raw_consensus_id, but fall back to contig_id
-                String sequenceContigName = removeNone(line[17]) == null ? removeNone(line[2]) : removeNone(line[17]);
+                String sequenceContigName = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]) == null ? removeNone(line[headerToIdx.get(HEADER_FIELD.CONTIG_ID)]) : removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]);
 
                 //NOTE: chimeras with a TRDV / TRAJ / TRAC are relatively common. categorize as TRA for reporting ease
-                String locus = line[5];
-                if (locus.equals("Multi") && cGene != null && removeNone(line[8]) != null && removeNone(line[6]) != null)
+                String locus = line[headerToIdx.get(HEADER_FIELD.CHAIN)];
+                if (locus.equals("Multi") && cGene != null && removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]) != null && removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]) != null)
                 {
-                    if (cGene.contains("TRAC") && removeNone(line[8]).contains("TRAJ") && removeNone(line[6]).contains("TRDV"))
+                    if (cGene.contains("TRAC") && removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]).contains("TRAJ") && removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]).contains("TRDV"))
                     {
                         locus = "TRA";
                         multiChainConverted++;
@@ -371,11 +378,22 @@ else if (discordantBarcodes.contains(barcode))
                 }
 
                 // Aggregate by: cDNA_ID, cdr3, chain, raw_clonotype_id, sequenceContigName, vHit, dHit, jHit, cHit, cdr3_nt
-                String key = StringUtils.join(new String[]{cDNA.toString(), line[12], locus, clonotypeId, sequenceContigName, removeNone(line[6]), removeNone(line[7]), removeNone(line[8]), cGene, removeNone(line[13])}, "<>");
+                String key = StringUtils.join(new String[]{cDNA.toString(), line[headerToIdx.get(HEADER_FIELD.CDR3)], locus, clonotypeId, sequenceContigName, removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.D_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]), cGene, removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3_NT)])}, "<>");
                 AssayModel am;
                 if (!rows.containsKey(key))
                 {
-                    am = createForRow(line, sequenceContigName, cDNA, clonotypeId, locus);
+                    am = new AssayModel();
+                    am.cdna = cDNA;
+                    am.cdr3 = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3)]);
+                    am.locus = locus;
+                    am.cloneId = clonotypeId;
+                    am.sequenceContigName = sequenceContigName;
+
+                    am.vHit = removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]);
+                    am.dHit = removeNone(line[headerToIdx.get(HEADER_FIELD.D_GENE)]);
+                    am.jHit = removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]);
+                    am.cHit = removeNone(line[headerToIdx.get(HEADER_FIELD.C_GENE)]);
+                    am.cdr3Nt = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3_NT)]);
                 }
                 else
                 {
@@ -459,24 +477,6 @@ else if (discordantBarcodes.contains(barcode))
         saveRun(job, protocol, model, assayRows, outDir, runId, deleteExisting);
     }
 
-    private AssayModel createForRow(String[] line, String sequenceContigName, Integer cDNA, String clonotypeId, String locus)
-    {
-        AssayModel am = new AssayModel();
-        am.cdna = cDNA;
-        am.cdr3 = removeNone(line[12]);
-        am.locus = locus;
-        am.cloneId = clonotypeId;
-        am.sequenceContigName = sequenceContigName;
-
-        am.vHit = removeNone(line[6]);
-        am.dHit = removeNone(line[7]);
-        am.jHit = removeNone(line[8]);
-        am.cHit = removeNone(line[9]);
-        am.cdr3Nt = removeNone(line[13]);
-
-        return am;
-    }
-
     private File getCellToHtoFile(ExpRun run) throws PipelineJobException
     {
         List<? extends ExpData> datas = run.getInputDatas(TCR_HASHING_CALLS, ExpProtocol.ApplicationType.ExperimentRunOutput);
@@ -671,8 +671,45 @@ public static File getPerCellCsv(File cellRangerOutDir)
         return new File(cellRangerOutDir, "all_contig_annotations.csv");
     }
 
-    public static boolean shouldRecoverGammaDeltaRow(String[] line)
+    private Map<HEADER_FIELD, Integer> inferFieldIdx(String[] line)
     {
-        return ("TRD".equals(line[5]) || "TRG".equals(line[5])) && !"None".equals(line[12]) && !"False".equals(line[10]);
+        Map<HEADER_FIELD, Integer> ret = new HashMap<>();
+        List<String> header = Arrays.asList(line);
+        for (HEADER_FIELD hf : HEADER_FIELD.values())
+        {
+            int idx = header.indexOf(hf.headerStr);
+            if (idx == -1)
+            {
+                throw new IllegalStateException("Missing expected header field: " + hf.headerStr);
+            }
+
+            ret.put(hf, idx);
+        }
+
+        return ret;
+    }
+
+    private enum HEADER_FIELD
+    {
+        CELL_BARCODE("barcode"),
+        IS_CELL("is_cell"),
+        CONTIG_ID("contig_id"),
+        CHAIN("chain"),
+        V_GENE("v_gene"),
+        D_GENE("d_gene"),
+        J_GENE("j_gene"),
+        C_GENE("c_gene"),
+        FULL_LENGTH("full_length"),
+        CDR3("cdr3"),
+        CDR3_NT("cdr3_nt"),
+        RAW_CLONOTYPE_ID("raw_clonotype_id"),
+        RAW_CONSENSUS_ID("raw_consensus_id");
+
+        String headerStr;
+
+        HEADER_FIELD(String headerStr)
+        {
+            this.headerStr = headerStr;
+        }
     }
 }

From 6b56a0a0b689209523fe075ffaa0cad806f281fa Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Dec 2021 06:16:57 -0800
Subject: [PATCH 05/10] Also update cell hashing code to  support cellranger
 version 6

---
 .../CellRangerVDJCellHashingHandler.java      | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
index c23954c52..fce343028 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
@@ -38,8 +38,8 @@
 
 public class CellRangerVDJCellHashingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
-    private FileType _vloupeFileType = new FileType("vloupe", false);
-    private FileType _htmlFileType = new FileType("html", false);
+    private final FileType _vloupeFileType = new FileType("vloupe", false);
+    private final FileType _htmlFileType = new FileType("html", false);
 
     public static final String CATEGORY = "Cell Hashing Calls (VDJ)";
 
@@ -61,10 +61,10 @@ private static List<ToolParameterDescriptor> getDefaultParams()
                 }}, true),
                 ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook.  This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
                     put("checked", true);
-                }}, false),
-                ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
-                    put("checked", true);
                 }}, false)
+//                ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
+//                    put("checked", true);
+//                }}, false)
         ));
 
         ret.addAll(CellHashingService.get().getHashingCallingParams(false));
@@ -247,6 +247,9 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
             ctx.getLogger().debug("allow cells lacking CDR3: " + allowCellsLackingCDR3);
 
             int totalBarcodeWritten = 0;
+            int cellbarcodeIdx = 0;
+            int notCellIdx = 1;
+            int cdr3Idx = -1;
             try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(cellBarcodeWhitelist), ',', CSVWriter.NO_QUOTE_CHARACTER); CSVReader reader = new CSVReader(Readers.getReader(perCellTsv), ','))
             {
                 int rowIdx = 0;
@@ -257,23 +260,32 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                 {
                     //skip header
                     rowIdx++;
-                    if (rowIdx > 1)
+                    if (rowIdx == 1)
+                    {
+                        List<String> header = Arrays.asList(row);
+                        cdr3Idx = header.indexOf("cdr3");
+                        if (cdr3Idx == -1)
+                        {
+                            throw new PipelineJobException("Unable to find CDR3 field in header: " + perCellTsv.getPath());
+                        }
+                    }
+                    else
                     {
-                        if ("False".equalsIgnoreCase(row[1]))
+                        if ("False".equalsIgnoreCase(row[notCellIdx]))
                         {
                             nonCell++;
                             continue;
                         }
 
                         //NOTE: allow these to pass for cell-hashing under some conditions
-                        boolean hasCDR3 = !"None".equals(row[12]);
+                        boolean hasCDR3 = !"None".equals(row[cdr3Idx]);
                         if (!hasCDR3)
                         {
                             noCallRows++;
                         }
 
                         //NOTE: 10x appends "-1" to barcodes
-                        String barcode = row[0].split("-")[0];
+                        String barcode = row[cellbarcodeIdx].split("-")[0];
                         if ((allowCellsLackingCDR3 || hasCDR3) && !uniqueBarcodes.contains(barcode))
                         {
                             writer.writeNext(new String[]{barcode});

From 678b5161afa08e5721e48154337ced97219ae325 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Dec 2021 06:18:42 -0800
Subject: [PATCH 06/10] Retain sites-only VCF for mGAP releases

---
 mGAP/resources/etls/prime-seq.xml             |  3 +++
 .../dbscripts/postgresql/mgap-16.59-16.60.sql |  1 +
 .../dbscripts/sqlserver/mgap-16.59-16.60.sql  |  1 +
 mGAP/resources/schemas/mgap.xml               |  9 ++++++++
 mGAP/src/org/labkey/mgap/mGAPModule.java      |  2 +-
 .../mgap/pipeline/mGapReleaseGenerator.java   | 21 +++++++++++++++++--
 6 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
 create mode 100644 mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql

diff --git a/mGAP/resources/etls/prime-seq.xml b/mGAP/resources/etls/prime-seq.xml
index ceabaa9b3..fd92b6c28 100644
--- a/mGAP/resources/etls/prime-seq.xml
+++ b/mGAP/resources/etls/prime-seq.xml
@@ -91,6 +91,8 @@
                     <column>liftedVcfId/dataid/DataFileUrl</column>
                     <column>liftedVcfId/name</column>
                     <column>liftedVcfId/library_id/name</column>
+                    <column>sitesOnlyVcfId/dataid/DataFileUrl</column>
+                    <column>sitesOnlyVcfId/name</column>
                     <column>humanJbrowseId</column>
                     <column>objectId</column>
                 </sourceColumns>
@@ -99,6 +101,7 @@
                 <columnTransforms>
                     <column source="genomeId/name" target="genomeId" transformClass="org.labkey.mgap.columnTransforms.GenomeTransform" />
                     <column source="vcfId/dataid/DataFileUrl" target="vcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
+                    <column source="sitesOnlyVcfId/dataid/DataFileUrl" target="sitesOnlyVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
                     <column source="jbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseSessionTransform"/>
                     <column source="liftedVcfId/dataid/DataFileUrl" target="liftedVcfId" transformClass="org.labkey.mgap.columnTransforms.LiftedVcfTransform" />
                     <column source="humanJbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseHumanSessionTransform"/>
diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
new file mode 100644
index 000000000..b7f281405
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
@@ -0,0 +1 @@
+ALTER TABLE mGAP.variantReleases ADD sitesOnlyVcfId int;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
new file mode 100644
index 000000000..b7f281405
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
@@ -0,0 +1 @@
+ALTER TABLE mGAP.variantReleases ADD sitesOnlyVcfId int;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index 7241248de..96b65a274 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -149,6 +149,15 @@
                     <fkColumnName>rowid</fkColumnName>
                 </fk>
             </column>
+            <column columnName="sitesOnlyVcfId">
+                <columnTitle>Sites-Only VCF File</columnTitle>
+                <nullable>true</nullable>
+                <fk>
+                    <fkDbSchema>sequenceanalysis</fkDbSchema>
+                    <fkTable>outputfiles</fkTable>
+                    <fkColumnName>rowid</fkColumnName>
+                </fk>
+            </column>
             <column columnName="humanJbrowseId">
                 <columnTitle>Genome Browser (Human)</columnTitle>
                 <nullable>true</nullable>
diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java
index b10b97131..f5cda0f3b 100644
--- a/mGAP/src/org/labkey/mgap/mGAPModule.java
+++ b/mGAP/src/org/labkey/mgap/mGAPModule.java
@@ -59,7 +59,7 @@ public String getName()
     @Override
     public Double getSchemaVersion()
     {
-        return 16.59;
+        return 16.60;
     }
 
     @Override
diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
index fb643b50b..cd5d2b08c 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
@@ -266,6 +266,7 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
             Map<String, SequenceOutputFile> outputVCFMap = new HashMap<>();
             Map<String, SequenceOutputFile> outputTableMap = new HashMap<>();
             Map<String, SequenceOutputFile> liftedVcfMap = new HashMap<>();
+            Map<String, SequenceOutputFile> sitesOnlyVcfMap = new HashMap<>();
             Map<String, SequenceOutputFile> trackVCFMap = new HashMap<>();
 
             for (SequenceOutputFile so : outputsCreated)
@@ -285,6 +286,11 @@ else if (so.getCategory().contains("Lifted"))
                     String name = so.getName().replaceAll(" Lifted to Human", "");
                     liftedVcfMap.put(name, so);
                 }
+                else if (so.getCategory().contains("mGAP Release: Sites Only"))
+                {
+                    String name = so.getName().replaceAll(": Sites Only", "");
+                    sitesOnlyVcfMap.put(name, so);
+                }
                 else if (so.getCategory().endsWith("Release"))
                 {
                     outputVCFMap.put(so.getName(), so);
@@ -328,6 +334,12 @@ else if (so.getCategory().endsWith("Release Track"))
                     throw new PipelineJobException("Unable to find lifted VCF for release: " + release);
                 }
 
+                SequenceOutputFile sitesOnlyVcf = sitesOnlyVcfMap.get(release);
+                if (sitesOnlyVcf == null)
+                {
+                    throw new PipelineJobException("Unable to find sites-only VCF for release: " + release);
+                }
+
                 //find basic stats:
                 job.getLogger().info("inspecting file: " + so.getName());
                 int totalSubjects;
@@ -391,6 +403,7 @@ else if (so.getCategory().endsWith("Release Track"))
                 row.put("releaseDate", new Date());
                 row.put("vcfId", so.getRowid());
                 row.put("liftedVcfId", liftedVcf.getRowid());
+                row.put("sitesOnlyVcfId", sitesOnlyVcf.getRowid());
                 row.put("variantTable", so2.getRowid());
                 row.put("genomeId", so.getLibrary_id());
                 row.put("totalSubjects", totalSubjects);
@@ -908,8 +921,12 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, ReferenceGenome s
                 wrapper.execute(sourceGenome.getWorkingFastaFile(), primaryTrackVcf, noGenotypes, Arrays.asList("--sites-only-vcf-output"));
             }
 
-            ctx.getFileManager().addIntermediateFile(noGenotypes);
-            ctx.getFileManager().addIntermediateFile(new File(noGenotypes.getPath() + ".tbi"));
+            SequenceOutputFile output = new SequenceOutputFile();
+            output.setFile(noGenotypes);
+            output.setName(primaryTrackVcf.getName() + ": Sites Only");
+            output.setCategory("mGAP Release: Sites Only");
+            output.setLibrary_id(sourceGenome.getGenomeId());
+            ctx.getFileManager().addSequenceOutput(output);
 
             //lift to target genome
             Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class);

From 8f8ff8528d676af18c46d7695a45cd6da4860100 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Dec 2021 07:01:49 -0800
Subject: [PATCH 07/10] Bugfixes related to TCRg/d parsing

---
 tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
index 416504373..d38c4e6a5 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
@@ -560,7 +560,7 @@ private Map<String, Object> processRow(AssayModel assayModel, AnalysisModel mode
 
     private String removeNone(String input)
     {
-        return "None".equals(input) ? null : input;
+        return "None".equals(input) ? null : StringUtils.trimToNull(input);
     }
 
     private void saveRun(PipelineJob job, ExpProtocol protocol, AnalysisModel model, List<Map<String, Object>> rows, File outDir, Integer runId, boolean deleteExisting) throws PipelineJobException

From 513fd68205f0572911728b588bd6db3fb176ae5c Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Dec 2021 07:04:14 -0800
Subject: [PATCH 08/10] Fix SQL error

---
 .../resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql | 2 +-
 mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
index b7f281405..4ec59f370 100644
--- a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
+++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.59-16.60.sql
@@ -1 +1 @@
-ALTER TABLE mGAP.variantReleases ADD sitesOnlyVcfId int;
\ No newline at end of file
+ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
index b7f281405..4ec59f370 100644
--- a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
+++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.59-16.60.sql
@@ -1 +1 @@
-ALTER TABLE mGAP.variantReleases ADD sitesOnlyVcfId int;
\ No newline at end of file
+ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;
\ No newline at end of file

From 8595e6c562fde629f3cd46cf51b17aa1aa84eb70 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 27 Dec 2021 10:48:14 -0800
Subject: [PATCH 09/10] Dont allow TCR rows lacking c-gene and either v or j

---
 .../labkey/tcrdb/pipeline/CellRangerVDJUtils.java    | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
index d38c4e6a5..9367c7b16 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
@@ -306,7 +306,8 @@ else if ("Negative".equals(hto))
                     continue;
                 }
 
-                if ("None".equals(line[headerToIdx.get(HEADER_FIELD.CDR3)]))
+                String cdr3 = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3)]);
+                if (cdr3 == null)
                 {
                     noCDR3++;
                     continue;
@@ -316,14 +317,16 @@ else if ("Negative".equals(hto))
                 if (cGene == null)
                 {
                     // Only discard these if chain type doesnt match between JGene and VGene.
-                    if (!line[headerToIdx.get(HEADER_FIELD.J_GENE)].substring(0, 3).equals(line[headerToIdx.get(HEADER_FIELD.V_GENE)].substring(0,3)))
+                    String vGene = removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]);
+                    String jGene = removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]);
+                    if (vGene == null || jGene == null || !jGene.substring(0, 3).equals(vGene.substring(0,3)))
                     {
                         noCGene++;
                         continue;
                     }
                 }
 
-                if ("False".equals(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
+                if ("False".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
                 {
                     notFullLength++;
                     continue;
@@ -352,8 +355,7 @@ else if (discordantBarcodes.contains(barcode))
                 knownBarcodes.add(barcode);
 
                 String clonotypeId = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CLONOTYPE_ID)]);
-                String cdr3 = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3)]);
-                if (clonotypeId == null && cdr3 != null && "TRUE".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
+                if (clonotypeId == null && "TRUE".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
                 {
                     hasCDR3NoClonotype++;
                 }

From c92e4b93f4d1f2983c68e719ec71e8fc1f5023bd Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 28 Dec 2021 06:58:08 -0800
Subject: [PATCH 10/10] Allow cellranger rows lacking a consensus clonotype
 call

---
 .../src/org/labkey/tcrdb/TCRdbController.java |  5 +-
 .../CellRangerVDJCellHashingHandler.java      |  3 +-
 .../tcrdb/pipeline/CellRangerVDJUtils.java    | 53 +++++++++++--------
 3 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/tcrdb/src/org/labkey/tcrdb/TCRdbController.java b/tcrdb/src/org/labkey/tcrdb/TCRdbController.java
index 7647b1d63..bdad868b1 100644
--- a/tcrdb/src/org/labkey/tcrdb/TCRdbController.java
+++ b/tcrdb/src/org/labkey/tcrdb/TCRdbController.java
@@ -706,6 +706,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
                     imputedSequences.append(rs.getString(FieldKey.fromString("sequence"))).append("\n");
                 }
 
+                // This applies to MiXCR
                 if (rs.getObject(FieldKey.fromString("cloneId")) != null && rs.getObject(FieldKey.fromString("clonesFile")) != null)
                 {
                     Integer key = rs.getInt(FieldKey.fromString("clonesFile"));
@@ -714,7 +715,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
 
                     clnaToCloneMap.put(key, set);
 
-                    clnaToCDR3Map.put(key.toString() + "_" + rs.getString(FieldKey.fromString("cloneId")), rs.getString(FieldKey.fromString("cdr3")));
+                    clnaToCDR3Map.put(key + "_" + rs.getString(FieldKey.fromString("cloneId")), rs.getString(FieldKey.fromString("cdr3")));
                 }
             });
 
@@ -830,7 +831,7 @@ public void export(DownloadCloneMaterialsForm form, HttpServletResponse response
 
                 for (String cloneId : clnaToCloneMap.get(expData))
                 {
-                    String cdr3 = clnaToCDR3Map.get(expData.toString() + "_" + cloneId);
+                    String cdr3 = clnaToCDR3Map.get(expData + "_" + cloneId);
 
                     File fq1 = new File(fqBase.getParentFile(), basename + "_cln" + cloneId + "_R1.fastq.gz");
                     if (!fq1.exists())
diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
index fce343028..caf72c5c4 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java
@@ -278,7 +278,8 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
                         }
 
                         //NOTE: allow these to pass for cell-hashing under some conditions
-                        boolean hasCDR3 = !"None".equals(row[cdr3Idx]);
+                        String cdr3String = StringUtils.trimToNull(row[cdr3Idx]);
+                        boolean hasCDR3 = cdr3String != null && !"None".equals(cdr3String);
                         if (!hasCDR3)
                         {
                             noCallRows++;
diff --git a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
index 9367c7b16..38d434c87 100644
--- a/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
+++ b/tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJUtils.java
@@ -285,7 +285,8 @@ else if ("Negative".equals(hto))
             int totalSkipped = 0;
             int doubletSkipped = 0;
             int discordantSkipped = 0;
-            int hasCDR3NoClonotype = 0;
+            int noConsensusClonotype = 0;
+            int fullLengthNoClonotype = 0;
             int multiChainConverted = 0;
             Set<String> knownBarcodes = new HashSet<>();
 
@@ -354,33 +355,42 @@ else if (discordantBarcodes.contains(barcode))
                 }
                 knownBarcodes.add(barcode);
 
-                String clonotypeId = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CLONOTYPE_ID)]);
-                if (clonotypeId == null && "TRUE".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
+                String rawClonotypeId = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CLONOTYPE_ID)]);
+                if (rawClonotypeId == null && "TRUE".equalsIgnoreCase(line[headerToIdx.get(HEADER_FIELD.FULL_LENGTH)]))
                 {
-                    hasCDR3NoClonotype++;
+                    fullLengthNoClonotype++;
                 }
 
-                if (clonotypeId == null)
+                if (rawClonotypeId == null)
                 {
-                    continue;
+                    noConsensusClonotype++;
+                    // NOTE: allow rows without consensus clonotypes, which will include the g/d rows:
+                    //continue;
                 }
 
                 //Preferentially use raw_consensus_id, but fall back to contig_id
-                String sequenceContigName = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]) == null ? removeNone(line[headerToIdx.get(HEADER_FIELD.CONTIG_ID)]) : removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]);
+                String coalescedContigName = removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]) == null ? removeNone(line[headerToIdx.get(HEADER_FIELD.CONTIG_ID)]) : removeNone(line[headerToIdx.get(HEADER_FIELD.RAW_CONSENSUS_ID)]);
 
                 //NOTE: chimeras with a TRDV / TRAJ / TRAC are relatively common. categorize as TRA for reporting ease
                 String locus = line[headerToIdx.get(HEADER_FIELD.CHAIN)];
-                if (locus.equals("Multi") && cGene != null && removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]) != null && removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]) != null)
+                String jGene = removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]);
+                String vGene = removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]);
+                if (locus.equals("Multi") && cGene != null && jGene != null && vGene != null)
                 {
-                    if (cGene.contains("TRAC") && removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]).contains("TRAJ") && removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]).contains("TRDV"))
+                    if (cGene.contains("TRAC") && jGene.contains("TRAJ") && vGene.contains("TRDV"))
                     {
                         locus = "TRA";
                         multiChainConverted++;
                     }
                 }
 
-                // Aggregate by: cDNA_ID, cdr3, chain, raw_clonotype_id, sequenceContigName, vHit, dHit, jHit, cHit, cdr3_nt
-                String key = StringUtils.join(new String[]{cDNA.toString(), line[headerToIdx.get(HEADER_FIELD.CDR3)], locus, clonotypeId, sequenceContigName, removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.D_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]), cGene, removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3_NT)])}, "<>");
+                if (cGene != null && !cGene.startsWith(locus))
+                {
+                    _log.error("Discordant locus/cGene: " + locus + " / " + cGene);
+                }
+
+                // Aggregate by: cDNA_ID, cdr3, chain, raw_clonotype_id, coalescedContigName, vHit, dHit, jHit, cHit, cdr3_nt
+                String key = StringUtils.join(new String[]{cDNA.toString(), line[headerToIdx.get(HEADER_FIELD.CDR3)], locus, rawClonotypeId, coalescedContigName, removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.D_GENE)]), removeNone(line[headerToIdx.get(HEADER_FIELD.J_GENE)]), cGene, removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3_NT)])}, "<>");
                 AssayModel am;
                 if (!rows.containsKey(key))
                 {
@@ -388,8 +398,8 @@ else if (discordantBarcodes.contains(barcode))
                     am.cdna = cDNA;
                     am.cdr3 = removeNone(line[headerToIdx.get(HEADER_FIELD.CDR3)]);
                     am.locus = locus;
-                    am.cloneId = clonotypeId;
-                    am.sequenceContigName = sequenceContigName;
+                    am.cloneId = rawClonotypeId == null ? coalescedContigName : rawClonotypeId;
+                    am.coalescedContigName = coalescedContigName;
 
                     am.vHit = removeNone(line[headerToIdx.get(HEADER_FIELD.V_GENE)]);
                     am.dHit = removeNone(line[headerToIdx.get(HEADER_FIELD.D_GENE)]);
@@ -402,7 +412,7 @@ else if (discordantBarcodes.contains(barcode))
                     am = rows.get(key);
                 }
 
-                uniqueContigNames.add(am.sequenceContigName);
+                uniqueContigNames.add(am.coalescedContigName);
                 am.barcodes.add(barcode);
                 rows.put(key, am);
 
@@ -418,13 +428,14 @@ else if (discordantBarcodes.contains(barcode))
             _log.info("total clonotype rows without CDR3: " + noCDR3);
             _log.info("total clonotype rows discarded for no C-gene: " + noCGene);
             _log.info("total clonotype rows discarded for not full length: " + notFullLength);
-            _log.info("total clonotype rows skipped for unknown barcodes: " + totalSkipped + " (" + (NumberFormat.getPercentInstance().format(totalSkipped / (double)totalCells)) + ")");
+            _log.info("total clonotype rows discarded for lacking consensus clonotype: " + noConsensusClonotype);
+            _log.info("total clonotype rows skipped for unknown barcocdes: " + totalSkipped + " (" + (NumberFormat.getPercentInstance().format(totalSkipped / (double)totalCells)) + ")");
             _log.info("total clonotype rows skipped because they are doublets: " + doubletSkipped + " (" + (NumberFormat.getPercentInstance().format(doubletSkipped / (double)totalCells)) + ")");
             _log.info("total clonotype rows skipped because they are discordant calls: " + discordantSkipped + " (" + (NumberFormat.getPercentInstance().format(discordantSkipped / (double)totalCells)) + ")");
             _log.info("unique known cell barcodes: " + knownBarcodes.size());
             _log.info("total clonotypes: " + rows.size());
             _log.info("total sequences: " + uniqueContigNames.size());
-            _log.info("total cells with CDR3, lacking clonotype: " + hasCDR3NoClonotype);
+            _log.info("total rows lacking clonotype, but marked full-length: " + fullLengthNoClonotype);
             _log.info("total rows converted from Multi to TRA: " + multiChainConverted);
 
         }
@@ -514,7 +525,7 @@ private static class AssayModel
         private int cdna;
 
         private Set<String> barcodes = new HashSet<>();
-        private String sequenceContigName;
+        private String coalescedContigName;
     }
 
     private Map<String, Object> processRow(AssayModel assayModel, AnalysisModel model, Map<Integer, CDNA_Library> cDNAMap, Integer runId, Map<Integer, Set<String>> totalCellsBySample, Map<String, String> sequenceMap) throws PipelineJobException
@@ -536,7 +547,7 @@ private Map<String, Object> processRow(AssayModel assayModel, AnalysisModel mode
         row.put("analysisId", model.getRowId());
         row.put("pipelineRunId", runId);
 
-        row.put("cloneId", assayModel.cloneId == null || assayModel.cloneId.contains("<>") ? null : assayModel.cloneId);
+        row.put("cloneId", assayModel.cloneId == null || assayModel.cloneId.contains("<>") ? assayModel.coalescedContigName : assayModel.cloneId);
         row.put("locus", assayModel.locus);
         row.put("vHit", assayModel.vHit);
         row.put("dHit", assayModel.dHit);
@@ -550,12 +561,12 @@ private Map<String, Object> processRow(AssayModel assayModel, AnalysisModel mode
         double fraction = (double)assayModel.barcodes.size() / totalCellsBySample.get(assayModel.cdna).size();
         row.put("fraction", fraction);
 
-        if (!sequenceMap.containsKey(assayModel.sequenceContigName))
+        if (!sequenceMap.containsKey(assayModel.coalescedContigName))
         {
-            throw new PipelineJobException("Unable to find sequence for: " + assayModel.sequenceContigName);
+            throw new PipelineJobException("Unable to find sequence for: " + assayModel.coalescedContigName);
         }
 
-        row.put("sequence", sequenceMap.get(assayModel.sequenceContigName));
+        row.put("sequence", sequenceMap.get(assayModel.coalescedContigName));
 
         return row;
     }