diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml index 51d926474..5f75392b1 100644 --- a/mGAP/resources/schemas/mgap.xml +++ b/mGAP/resources/schemas/mgap.xml @@ -689,7 +689,7 @@ rowid - mGAP Release Track Sample Sets + mGAP Samples To Include Per Track DETAILED diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java index 1a9622b2e..693e576d4 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java @@ -51,9 +51,11 @@ public class AnnotationStep extends AbstractCommandPipelineStep { public static final String GRCH37 = "genome37"; private static final String CLINVAR_VCF = "clinvar37"; + private static final String DBNSFP_FILE = "dbnsfpFile"; + public static final String CHAIN_FILE = "CHAIN_FILE"; - public AnnotationStep(PipelineStepProvider provider, PipelineContext ctx) + public AnnotationStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new CassandraRunner(ctx.getLogger())); } @@ -67,6 +69,10 @@ public Provider() {{ put("allowBlank", false); }}, null), + ToolParameterDescriptor.createExpDataParam(DBNSFP_FILE, "dbNSFP Database (GRCh37)", "This is the DataId of the dbNSFP database (txt.gz file) using the GRCh37 genome.", "ldk-expdatafield", new JSONObject() + {{ + put("allowBlank", false); + }}, null), ToolParameterDescriptor.create(GRCH37, "GRCh37 Genome", "The genome that matches human GRCh37.", "ldk-simplelabkeycombo", new JSONObject() {{ put("width", 400); @@ -126,10 +132,21 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); File clinvarVCF = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(CLINVAR_VCF).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class)); + if (!clinvarVCF.exists()) + { + throw new PipelineJobException("Unable to find file: " + clinvarVCF.getPath()); + } + ReferenceGenome grch37Genome = getPipelineCtx().getSequenceSupport().getCachedGenome(getProvider().getParameterByName(GRCH37).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class)); Integer chainFileId = getPipelineCtx().getSequenceSupport().getCachedObject(CHAIN_FILE, Integer.class); File chainFile = getPipelineCtx().getSequenceSupport().getCachedData(chainFileId); + File dbnsfpFile = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(DBNSFP_FILE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class)); + if (!dbnsfpFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + dbnsfpFile.getPath()); + } + getPipelineCtx().getLogger().info("processing file: " + inputVCF.getName()); ReferenceGenome originalGenome = getPipelineCtx().getSequenceSupport().getCachedGenome(genome.getGenomeId()); @@ -293,6 +310,22 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno output.addIntermediateFile(clinvarAnnotated); output.addIntermediateFile(new File(clinvarAnnotated.getPath() + ".tbi")); + //annotate with SnpSift + getPipelineCtx().getLogger().info("annotating with SnpSift"); + File snpSiftAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".snpSift.vcf.gz"); + if (forceRecreate || !indexExists(snpSiftAnnotated)) + { + SnpSiftWrapper ssRunner = new SnpSiftWrapper(getPipelineCtx().getLogger()); + ssRunner.runSnpSift(dbnsfpFile, clinvarAnnotated, snpSiftAnnotated); + } + else + { + getPipelineCtx().getLogger().info("resuming with existing file: " + snpSiftAnnotated.getPath()); + } + output.addOutput(snpSiftAnnotated, "VCF Annotated With SnpSift"); + output.addIntermediateFile(snpSiftAnnotated); + output.addIntermediateFile(new File(snpSiftAnnotated.getPath() + ".tbi")); + //annotate with cassandra getPipelineCtx().getLogger().info("annotating with Cassandra"); String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cassandra"; diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java index 84b5e5e29..0a49a2109 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java @@ -6,7 +6,9 @@ import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.CompareType; import org.labkey.api.data.Container; @@ -31,6 +33,7 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; @@ -50,6 +53,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; public class GenerateMgapTracksStep extends AbstractPipelineStep implements VariantProcessingStep @@ -71,7 +75,10 @@ public static class Provider extends AbstractVariantProcessingStepProvider sampleIdToMgapAlias = getSampleToAlias(so.getFile()); // Now read track list, validate IDs present, and write to file: TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACK_SUBSETS); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("trackName", "subjectId")); + Set requestedNotInVcf = new HashSet<>(); Map> trackToSubject = new HashMap<>(); ts.forEachResults(rs -> { if (!trackToSubject.containsKey(rs.getString(FieldKey.fromString("trackName")))) @@ -108,12 +116,17 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List { writer.writeNext(new String[]{trackName, x}); }); @@ -136,15 +150,60 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List intervals) throws PipelineJobException { VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); - Map> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true))); + VCFHeader header; + try (VCFFileReader reader = new VCFFileReader(inputVCF)) + { + header = reader.getFileHeader(); + } + + if (!header.hasInfoLine("mGAPV")) + { + throw new IllegalStateException("VCF is missing the annotation: mGAPV"); + } + + int idx = 0; for (String trackName : trackToSamples.keySet()) { + idx++; + getPipelineCtx().getJob().setStatus(PipelineJob.TaskStatus.running, "Processing track " + idx + " of " + trackToSamples.size()); + File vcf = processTrack(inputVCF, trackName, trackToSamples.get(trackName), outputDirectory, genome, intervals); - output.addSequenceOutput(vcf, trackName, TRACK_CATEGORY, null, null, genome.getGenomeId(), "Total Samples: " + trackToSamples.get(trackName).size()); + output.addSequenceOutput(vcf, trackName, TRACK_CATEGORY, null, null, genome.getGenomeId(), "mGAP track: " + trackName + ", total samples: " + trackToSamples.get(trackName).size()); } + // Also create the Novel Sites track: + String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + if (releaseVersion.toLowerCase().startsWith("v")) + { + releaseVersion = releaseVersion.substring(1); + } + + if (!NumberUtils.isCreatable(releaseVersion)) + { + throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion); + } + + File novelSitesOutput = new File(outputDirectory, "mGAP_v" + releaseVersion + "_NovelSites.vcf.gz"); + if (new File(novelSitesOutput.getPath() + ".tbi").exists()) + { + getPipelineCtx().getLogger().debug("Index exists, will not remake novel sites VCF"); + } + else + { + getPipelineCtx().getJob().setStatus(PipelineJob.TaskStatus.running, "Processing novel sites track"); + + SelectVariantsWrapper sv = new SelectVariantsWrapper(getPipelineCtx().getLogger()); + List svArgs = new ArrayList<>(); + svArgs.add("-select"); + svArgs.add("mGAPV == '" + releaseVersion + "'"); + sv.execute(genome.getWorkingFastaFile(), inputVCF, novelSitesOutput, svArgs); + } + + getPipelineCtx().getJob().getLogger().info("total variants: " + SequenceAnalysisService.get().getVCFLineCount(novelSitesOutput, getPipelineCtx().getJob().getLogger(), false)); + output.addSequenceOutput(novelSitesOutput, "Novel Sites in This Release", TRACK_CATEGORY, null, null, genome.getGenomeId(), "These are novel sites in mGAP v" + releaseVersion); + return output; } @@ -159,45 +218,62 @@ public void complete(PipelineJob job, List inputs, List newRow = new CaseInsensitiveHashMap<>(); - newRow.put("trackName", so.getName()); - newRow.put("label", so.getName()); - newRow.put("vcfId", so.getRowid()); - newRow.put("isprimarytrack", false); - - BatchValidationException bve = new BatchValidationException(); - releaseTracks.getUpdateService().insertRows(job.getUser(), targetContainer, Arrays.asList(newRow), bve, null, null); - if (bve.hasErrors()) - { - throw bve; - } - } - else - { - job.getLogger().debug("Updating existing track: " + so.getName()); - Map toUpdate = new CaseInsensitiveHashMap<>(); - toUpdate.put("rowId", ts.getObject(Integer.class)); - toUpdate.put("vcfId", so.getRowid()); + createOrUpdateTrack(so, job); + } - Map oldKeys = new CaseInsensitiveHashMap<>(); - toUpdate.put("rowId", ts.getObject(Integer.class)); + createOrUpdatePrimaryTrack(inputs.get(0), job); + } - releaseTracks.getUpdateService().updateRows(job.getUser(), targetContainer, Arrays.asList(toUpdate), Arrays.asList(oldKeys), null, null); + private void createOrUpdatePrimaryTrack(SequenceOutputFile so, PipelineJob job) throws PipelineJobException + { + createOrUpdateTrack(so, job, "mGAP Release", true); + } + + private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job) throws PipelineJobException + { + createOrUpdateTrack(so, job, so.getName(), false); + } + + private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job, String trackName, boolean isPrimaryTrack) throws PipelineJobException + { + try + { + Container targetContainer = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer(); + TableInfo releaseTracks = QueryService.get().getUserSchema(job.getUser(), targetContainer, mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACKS); + TableSelector ts = new TableSelector(releaseTracks, PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("trackName"), so.getName()), null); + if (!ts.exists()) + { + job.getLogger().debug("Creating new track: " + so.getName()); + Map newRow = new CaseInsensitiveHashMap<>(); + newRow.put("trackName", trackName); + newRow.put("label", trackName); + newRow.put("vcfId", so.getRowid()); + newRow.put("isprimarytrack", isPrimaryTrack); + + BatchValidationException bve = new BatchValidationException(); + releaseTracks.getUpdateService().insertRows(job.getUser(), targetContainer, Arrays.asList(newRow), bve, null, null); + if (bve.hasErrors()) + { + throw bve; } } - catch (QueryUpdateServiceException | SQLException | BatchValidationException | DuplicateKeyException | InvalidKeyException e) + else { - throw new PipelineJobException(e); + job.getLogger().debug("Updating existing track: " + so.getName()); + Map toUpdate = new CaseInsensitiveHashMap<>(); + toUpdate.put("rowId", ts.getObject(Integer.class)); + toUpdate.put("vcfId", so.getRowid()); + + Map oldKeys = new CaseInsensitiveHashMap<>(); + toUpdate.put("rowId", ts.getObject(Integer.class)); + + releaseTracks.getUpdateService().updateRows(job.getUser(), targetContainer, Arrays.asList(toUpdate), Arrays.asList(oldKeys), null, null); } } + catch (QueryUpdateServiceException | SQLException | BatchValidationException | DuplicateKeyException | InvalidKeyException e) + { + throw new PipelineJobException(e); + } } private boolean indexExists(File vcf) @@ -252,7 +328,7 @@ private File processTrack(File currentVCF, String trackName, List sample private Map> parseSampleMap(File sampleMapFile) throws PipelineJobException { - Map> ret = new HashMap<>(); + Map> ret = new TreeMap<>(); try (CSVReader reader = new CSVReader(Readers.getReader(sampleMapFile), '\t')) { String[] line; @@ -277,7 +353,8 @@ private Map> parseSampleMap(File sampleMapFile) throws Pipe private Map getSampleToAlias(File input) throws PipelineJobException { - Map sampleNameMap = new HashMap<>(); + Map trueIdToMgapId = new HashMap<>(); + Map mGapIdToTrueId = new HashMap<>(); try { SequenceAnalysisService.get().ensureVcfIndex(input, getPipelineCtx().getLogger()); @@ -290,27 +367,32 @@ private Map getSampleToAlias(File input) throws PipelineJobExcep try (VCFFileReader reader = new VCFFileReader(input)) { VCFHeader header = reader.getFileHeader(); - List subjects = header.getSampleNamesInOrder(); - if (subjects.isEmpty()) + List allMgapIds = header.getSampleNamesInOrder(); + if (allMgapIds.isEmpty()) { return Collections.emptyMap(); } - Set sampleNames = new HashSet<>(header.getSampleNamesInOrder()); - getPipelineCtx().getLogger().info("total samples in input VCF: " + sampleNames.size()); + getPipelineCtx().getLogger().info("total samples in input VCF: " + allMgapIds.size()); // validate all VCF samples are using aliases: - querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("externalAlias"), subjects, CompareType.IN)); - - List missingSamples = new ArrayList<>(sampleNames); - missingSamples.removeAll(sampleNameMap.keySet()); + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("externalAlias"), allMgapIds, CompareType.IN); + TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_ANIMAL_MAPPING); + TableSelector ts = new TableSelector(ti, PageFlowUtil.set("subjectname", "externalAlias"), new SimpleFilter(filter), null); + ts.forEachResults(rs -> { + trueIdToMgapId.put(rs.getString(FieldKey.fromString("subjectname")), rs.getString(FieldKey.fromString("externalAlias"))); + mGapIdToTrueId.put(rs.getString(FieldKey.fromString("externalAlias")), rs.getString(FieldKey.fromString("subjectname"))); + }); + + List missingSamples = new ArrayList<>(allMgapIds); + missingSamples.removeAll(mGapIdToTrueId.keySet()); if (!missingSamples.isEmpty()) { throw new PipelineJobException("The following samples in this VCF do not match known mGAP IDs: " + StringUtils.join(missingSamples, ", ")); } // Now ensure we dont have duplicate mappings: - List translated = new ArrayList<>(header.getSampleNamesInOrder().stream().map(sampleNameMap::get).toList()); + List translated = new ArrayList<>(header.getSampleNamesInOrder().stream().map(mGapIdToTrueId::get).toList()); Set unique = new HashSet<>(); List duplicates = translated.stream().filter(o -> !unique.add(o)).toList(); if (!duplicates.isEmpty()) @@ -319,13 +401,6 @@ private Map getSampleToAlias(File input) throws PipelineJobExcep } } - return sampleNameMap; - } - - private void querySampleBatch(final Map sampleNameMap, SimpleFilter filter) - { - TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_ANIMAL_MAPPING); - TableSelector ts = new TableSelector(ti, PageFlowUtil.set("subjectname", "externalAlias"), new SimpleFilter(filter), null); - ts.forEachResults(rs -> sampleNameMap.put(rs.getString(FieldKey.fromString("subjectname")), rs.getString(FieldKey.fromString("externalAlias")))); + return trueIdToMgapId; } } diff --git a/mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java b/mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java index 89f4c648f..527817b36 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java @@ -2,7 +2,6 @@ import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.CSVWriter; -import com.google.common.collect.Lists; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.Interval; import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; @@ -14,6 +13,7 @@ import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.Nullable; +import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.CompareType; import org.labkey.api.data.Results; import org.labkey.api.data.Selector; @@ -51,7 +51,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; public class RenameSamplesForMgapStep extends AbstractPipelineStep implements VariantProcessingStep { @@ -253,7 +252,7 @@ private Map getSamplesToAlias(File input) throws PipelineJobExce getPipelineCtx().getLogger().info("total samples in input VCF: " + sampleNames.size()); // Pass 1: match on proper ID: - querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("subjectname"), subjects, CompareType.IN)); + querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("subjectname"), subjects, CompareType.IN), subjects); // Pass 2: add others using otherNames: List missingSamples = new ArrayList<>(sampleNames); @@ -261,7 +260,7 @@ private Map getSamplesToAlias(File input) throws PipelineJobExce if (!missingSamples.isEmpty()) { getPipelineCtx().getLogger().debug("Querying " + missingSamples.size() + " samples using otherNames field"); - querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("otherNames"), missingSamples, CompareType.CONTAINS_ONE_OF)); + querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("otherNames"), missingSamples, CompareType.CONTAINS_ONE_OF), subjects); } getPipelineCtx().getLogger().info("total sample names to alias: " + sampleNameMap.size()); @@ -285,8 +284,13 @@ private Map getSamplesToAlias(File input) throws PipelineJobExce return sampleNameMap; } - private void querySampleBatch(Map sampleNameMap, SimpleFilter filter) + private void querySampleBatch(Map sampleNameMap, SimpleFilter filter, List sampleNames) { + final Map subjectToOrigCase = new CaseInsensitiveHashMap<>(); + sampleNames.forEach(x -> { + subjectToOrigCase.put(x, x); + }); + TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_ANIMAL_MAPPING); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("subjectname", "externalAlias", "otherNames"), new SimpleFilter(filter), null); ts.forEachResults(new Selector.ForEachBlock() @@ -294,7 +298,13 @@ private void querySampleBatch(Map sampleNameMap, SimpleFilter fi @Override public void exec(Results rs) throws SQLException { - sampleNameMap.put(rs.getString(FieldKey.fromString("subjectname")), rs.getString(FieldKey.fromString("externalAlias"))); + String subjectId = rs.getString(FieldKey.fromString("subjectname")); + if (subjectToOrigCase.containsKey(subjectId) && !subjectToOrigCase.get(subjectId).equals(subjectId)) + { + subjectId = subjectToOrigCase.get(subjectId); + } + + sampleNameMap.put(subjectId, rs.getString(FieldKey.fromString("externalAlias"))); if (rs.getObject(FieldKey.fromString("otherNames")) != null) { @@ -315,6 +325,11 @@ public void exec(Results rs) throws SQLException throw new IllegalStateException("Improper data in mgap.aliases table. Dual/conflicting aliases: " + name + ": " + rs.getString(FieldKey.fromString("externalAlias")) + " / " + sampleNameMap.get(name)); } + if (subjectToOrigCase.containsKey(name) && !subjectToOrigCase.get(name).equals(name)) + { + name = subjectToOrigCase.get(name); + } + getPipelineCtx().getLogger().debug("Adding otherName: " + name); sampleNameMap.put(name, rs.getString(FieldKey.fromString("externalAlias"))); } diff --git a/mGAP/src/org/labkey/mgap/pipeline/SnpSiftWrapper.java b/mGAP/src/org/labkey/mgap/pipeline/SnpSiftWrapper.java new file mode 100644 index 000000000..4944aa9ee --- /dev/null +++ b/mGAP/src/org/labkey/mgap/pipeline/SnpSiftWrapper.java @@ -0,0 +1,93 @@ +package org.labkey.mgap.pipeline; + +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.Logger; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.PipelineJobService; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by bimber on 8/24/2016. + */ +public class SnpSiftWrapper extends AbstractCommandWrapper +{ + public SnpSiftWrapper(Logger log) + { + super(log); + } + + public void runSnpSift(File dbnsfpFile, File input, File output) throws PipelineJobException + { + getLogger().info("Annotating VCF with SnpSift"); + + List params = new ArrayList<>(); + params.add(SequencePipelineService.get().getJavaFilepath()); + params.addAll(SequencePipelineService.get().getJavaOpts()); + params.add("-jar"); + params.add(getSnpSiftJar().getPath()); + params.add("dbnsfp"); + params.add("-db"); + params.add(dbnsfpFile.getPath()); + + params.add("-noStats"); + params.add("-nodownload"); + + params.add(input.getPath()); + + File unzippedVcf = new File(getOutputDir(output), "snpSift.vcf"); + execute(params, unzippedVcf); + + if (!unzippedVcf.exists()) + { + throw new PipelineJobException("output not found: " + unzippedVcf.getName()); + } + + unzippedVcf = SequenceAnalysisService.get().bgzipFile(unzippedVcf, getLogger()); + try + { + if (!unzippedVcf.equals(output)) + { + if (output.exists()) + { + getLogger().debug("deleting pre-existing output file: " + output.getPath()); + output.delete(); + } + FileUtils.moveFile(unzippedVcf, output); + } + SequenceAnalysisService.get().ensureVcfIndex(output, getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private File getJarDir() + { + String path = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("SNPEFFPATH"); + if (path != null) + { + return new File(path, "snpEff"); + } + + path = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath(SequencePipelineService.SEQUENCE_TOOLS_PARAM); + if (path == null) + { + path = PipelineJobService.get().getAppProperties().getToolsDirectory(); + } + + return path == null ? new File("snpEff") : new File(path, "snpEff"); + } + + public File getSnpSiftJar() + { + return new File(getJarDir(), "SnpSift.jar"); + } +} diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java index 1e245b47f..49dbb5c3f 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java @@ -1,6 +1,7 @@ package org.labkey.mgap.pipeline; import htsjdk.samtools.util.Interval; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.Logger; import org.json.JSONObject; import org.labkey.api.data.SimpleFilter; @@ -81,6 +82,16 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno getPipelineCtx().getLogger().info("Annotating VCF by mGAP Release"); String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, "0.0"); + if (releaseVersion.toLowerCase().startsWith("v")) + { + releaseVersion = releaseVersion.substring(1); + } + + if (!NumberUtils.isCreatable(releaseVersion)) + { + throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion); + } + String priorReleaseLabel = getPipelineCtx().getSequenceSupport().getCachedObject(PRIOR_RELEASE_LABEL, String.class); int sitesOnlyExpDataId = getPipelineCtx().getSequenceSupport().getCachedObject(SITES_ONLY_DATA, Integer.class); File sitesOnlyVcf = getPipelineCtx().getSequenceSupport().getCachedData(sitesOnlyExpDataId); diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java index f6c01d588..de1b00c8d 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java +++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java @@ -931,7 +931,7 @@ private void checkVcfAnnotationsAndSamples(File vcfInput, boolean skipAnnotation if (!skipAnnotationChecks) { - for (String info : Arrays.asList("CADD_PH", "OMIMN", "CLN_ALLELE", "AF")) + for (String info : Arrays.asList("CADD_PH", "OMIMN", "CLN_ALLELE", "AF", "mGAPV")) { if (!header.hasInfoLine(info)) { @@ -1617,6 +1617,20 @@ private void maybeWriteVariantLine(Set> queuedLines, VariantContext } } + if (af != null && !NumberUtils.isCreatable(af.toString())) + { + log.error("Non-numeric AF: " + vc.getContig() + " " + vc.getStart() + ". " + vc.getAttributeAsString("AF", "")); + } + else if (af != null) + { + double afNumber = Double.parseDouble(af.toString()); + if (afNumber == 0.0) + { + log.error("Found record with AF=0: " + vc.getContig() + " " + vc.getStart() + ". " + vc.getAttributeAsString("AF", "")); + return; + } + } + Object cadd = vc.getAttribute("CADD_PH"); queuedLines.add(Arrays.asList(vc.getContig(), String.valueOf(vc.getStart()), vc.getReference().getDisplayString(), allele, source, reason, (description == null ? "" : description), StringUtils.join(overlappingGenes, ";"), StringUtils.join(omims, ";"), StringUtils.join(omimds, ";"), af == null ? "" : af.toString(), identifier == null ? "" : identifier, cadd == null ? "" : cadd.toString())); diff --git a/mcc/resources/queries/study/demographics.query.xml b/mcc/resources/queries/study/demographics.query.xml index 7cdf65a8c..7e2e2457b 100644 --- a/mcc/resources/queries/study/demographics.query.xml +++ b/mcc/resources/queries/study/demographics.query.xml @@ -143,20 +143,22 @@ Dam + ALWAYS_OFF study animal - id + Id Sire + ALWAYS_OFF study animal - id + Id diff --git a/mcc/resources/web/mcc/window/MarkShippedWindow.js b/mcc/resources/web/mcc/window/MarkShippedWindow.js index 14245b422..a1d042600 100644 --- a/mcc/resources/web/mcc/window/MarkShippedWindow.js +++ b/mcc/resources/web/mcc/window/MarkShippedWindow.js @@ -37,10 +37,23 @@ Ext4.define('MCC.window.MarkShippedWindow', { html: 'This will:
1) Mark the selected animals as shipped from this center
2) Enter a new demographics record in the selected study
3) Preserve the MCC ID for each animal.', border: false, style: 'padding-bottom: 10px;' + },{ + xtype: 'checkbox', + fieldLabel: 'Animal Will Use Previous Id', + itemId: 'usePreviousId', + listeners: { + scope: this, + change: function (field, val) { + var target = field.up('panel').down('#newId'); + target.allowBlank = !!val; + target.setVisible(!val); + } + }, },{ xtype: 'textfield', fieldLabel: 'New ID (blank if unchanged)', - itemId: 'newId' + itemId: 'newId', + allowBlank: false },{ xtype: 'datefield', fieldLabel: 'Effective Date', @@ -121,6 +134,12 @@ Ext4.define('MCC.window.MarkShippedWindow', { return; } + if (!win.down('#usePreviousId').getValue() && !win.down('#newId').getValue()) { + Ext4.Msg.hide(); + Ext4.Msg.alert('Error', 'Must enter the new ID'); + return; + } + var targetFolderId = win.down('#targetFolder').store.findRecord('Path', targetFolder).get('EntityId'); Ext4.Msg.wait('Saving...'); LABKEY.Query.selectRows({ @@ -139,7 +158,6 @@ Ext4.define('MCC.window.MarkShippedWindow', { var row = results.rows[0]; var newId = win.down('#newId').getValue() || row.Id; - var commands = []; var shouldAddDeparture = !row['Id/MostRecentDeparture/MostRecentDeparture'] || row['Id/MostRecentDeparture/MostRecentDeparture'] !== Ext4.Date.format(row.effectiveDate, 'Y-m-d') || row.Id !== newId; diff --git a/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx b/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx index e91369432..33d0fd419 100644 --- a/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx +++ b/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx @@ -1,15 +1,26 @@ import React, { useEffect, useState } from 'react'; -import { getServerContext, Query } from '@labkey/api'; -import { TSV } from 'tsv'; +import { ActionURL, Filter, getServerContext, Query } from '@labkey/api'; import './../Dashboard/dashboard.css'; import ScatterChart from './ScatterChart'; import { ErrorBoundary } from '@labkey/components'; import { Box, Tab, Tabs } from '@material-ui/core'; import KinshipTable from './KinshipTable'; + +function GenomeBrowser(props: {jbrowseId: any}) { + const { jbrowseId } = props; + + return ( +
+ Click here to view Marmoset SNP data in the genome browser +
+ ); +} + export function GeneticsPlot() { const [pcaData, setPcaData] = useState([]); const [kinshipData, setKinshipData] = useState([]); + const [jbrowseId, setJBrowseId] = useState(null); const [value, setValue] = React.useState(0); const ctx = getServerContext().getModuleContext('mcc') || {}; @@ -31,6 +42,25 @@ export function GeneticsPlot() { scope: this }); + Query.selectRows({ + containerPath: containerPath, + schemaName: 'jbrowse', + queryName: 'databases', + columns: 'objectid', + filterArray: [ + Filter.create('name', 'Marmoset Variant Data') + ], + success: function(results) { + const data = results.rows + setJBrowseId(data.length ? data[0].objectid : null) + }, + failure: function(response) { + alert('There was an error loading JBrowse data'); + console.log(response); + }, + scope: this + }); + Query.selectRows({ containerPath: containerPath, schemaName: 'study', @@ -87,6 +117,7 @@ export function GeneticsPlot() { +
@@ -95,6 +126,7 @@ export function GeneticsPlot() {
{value === 0 && } {value === 1 && } + {value === 2 && }
diff --git a/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java b/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java index e481c2254..638af85df 100644 --- a/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java +++ b/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java @@ -3,6 +3,7 @@ import org.apache.xmlbeans.XmlException; import org.jetbrains.annotations.NotNull; import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.collections.CaseInsensitiveHashSet; import org.labkey.api.data.CompareType; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; @@ -94,7 +95,7 @@ private void populateForField(PipelineJob job, TableInfo sourceTi, String fieldN TableSelector ts = new TableSelector(sourceTi, PageFlowUtil.set(originalIdField, "container"), filter, null); if (ts.exists()) { - Set idsEncountered = new HashSet<>(); + Set idsEncountered = new CaseInsensitiveHashSet(); ts.forEachResults(rs -> { Container c = ContainerManager.getForId(rs.getString(FieldKey.fromString("container"))); List> rows = toAdd.containsKey(c) ? toAdd.get(c) : new ArrayList<>(); diff --git a/mcc/test/src/org/labkey/test/tests/mcc/MccTest.java b/mcc/test/src/org/labkey/test/tests/mcc/MccTest.java index 056fade17..a8d35cad8 100644 --- a/mcc/test/src/org/labkey/test/tests/mcc/MccTest.java +++ b/mcc/test/src/org/labkey/test/tests/mcc/MccTest.java @@ -116,6 +116,13 @@ private void testAnimalImportAndTransfer() throws Exception Ext4ComboRef.getForLabel(this, "Target Folder").setComboByDisplayValue("Other"); waitAndClick(Ext4Helper.Locators.ext4Button("Submit")); + // This should fail initially: + new Window.WindowFinder(getDriver()).withTitle("Error").waitFor(); + waitAndClick(Ext4Helper.Locators.ext4Button("OK")); + + Ext4FieldRef.getForLabel(this, "Animal Will Use Previous Id").setChecked(true); + waitAndClick(Ext4Helper.Locators.ext4Button("Submit")); + new Window.WindowFinder(getDriver()).withTitle("Success").waitFor(); waitAndClickAndWait(Ext4Helper.Locators.ext4Button("OK")); @@ -162,6 +169,7 @@ private void testAnimalImportAndTransfer() throws Exception sleep(100); Ext4ComboRef.getForLabel(this, "Target Folder").setComboByDisplayValue("Other"); + Ext4FieldRef.getForLabel(this, "Animal Will Use Previous Id").setChecked(true); waitAndClick(Ext4Helper.Locators.ext4Button("Submit")); new Window.WindowFinder(getDriver()).withTitle("Success").waitFor();