Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mGAP/resources/schemas/mgap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@
<table tableName="releaseTrackSubsets" tableDbType="TABLE" useColumnOrder="true">
<javaCustomizer class="org.labkey.ldk.query.DefaultTableCustomizer" />
<pkColumnName>rowid</pkColumnName>
<tableTitle>mGAP Release Track Sample Sets</tableTitle>
<tableTitle>mGAP Samples To Include Per Track</tableTitle>
<auditLogging>DETAILED</auditLogging>
<columns>
<column columnName="rowid">
Expand Down
35 changes: 34 additions & 1 deletion mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner>
{
public static final String GRCH37 = "genome37";
private static final String CLINVAR_VCF = "clinvar37";
private static final String DBNSFP_FILE = "dbnsfpFile";

public static final String CHAIN_FILE = "CHAIN_FILE";

public AnnotationStep(PipelineStepProvider provider, PipelineContext ctx)
public AnnotationStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new CassandraRunner(ctx.getLogger()));
}
Expand All @@ -67,6 +69,10 @@ public Provider()
{{
put("allowBlank", false);
}}, null),
ToolParameterDescriptor.createExpDataParam(DBNSFP_FILE, "dbNSFP Database (GRCh37)", "This is the DataId of the dbNSFP database (txt.gz file) using the GRCh37 genome.", "ldk-expdatafield", new JSONObject()
{{
put("allowBlank", false);
}}, null),
ToolParameterDescriptor.create(GRCH37, "GRCh37 Genome", "The genome that matches human GRCh37.", "ldk-simplelabkeycombo", new JSONObject()
{{
put("width", 400);
Expand Down Expand Up @@ -126,10 +132,21 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();

File clinvarVCF = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(CLINVAR_VCF).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
if (!clinvarVCF.exists())
{
throw new PipelineJobException("Unable to find file: " + clinvarVCF.getPath());
}

ReferenceGenome grch37Genome = getPipelineCtx().getSequenceSupport().getCachedGenome(getProvider().getParameterByName(GRCH37).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
Integer chainFileId = getPipelineCtx().getSequenceSupport().getCachedObject(CHAIN_FILE, Integer.class);
File chainFile = getPipelineCtx().getSequenceSupport().getCachedData(chainFileId);

File dbnsfpFile = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(DBNSFP_FILE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
if (!dbnsfpFile.exists())
{
throw new PipelineJobException("Unable to find file: " + dbnsfpFile.getPath());
}

getPipelineCtx().getLogger().info("processing file: " + inputVCF.getName());

ReferenceGenome originalGenome = getPipelineCtx().getSequenceSupport().getCachedGenome(genome.getGenomeId());
Expand Down Expand Up @@ -293,6 +310,22 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
output.addIntermediateFile(clinvarAnnotated);
output.addIntermediateFile(new File(clinvarAnnotated.getPath() + ".tbi"));

//annotate with SnpSift
getPipelineCtx().getLogger().info("annotating with SnpSift");
File snpSiftAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".snpSift.vcf.gz");
if (forceRecreate || !indexExists(snpSiftAnnotated))
{
SnpSiftWrapper ssRunner = new SnpSiftWrapper(getPipelineCtx().getLogger());
ssRunner.runSnpSift(dbnsfpFile, clinvarAnnotated, snpSiftAnnotated);
}
else
{
getPipelineCtx().getLogger().info("resuming with existing file: " + snpSiftAnnotated.getPath());
}
output.addOutput(snpSiftAnnotated, "VCF Annotated With SnpSift");
output.addIntermediateFile(snpSiftAnnotated);
output.addIntermediateFile(new File(snpSiftAnnotated.getPath() + ".tbi"));

//annotate with cassandra
getPipelineCtx().getLogger().info("annotating with Cassandra");
String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cassandra";
Expand Down
Loading