Skip to content
This repository was archived by the owner on Oct 29, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
<dependency>
<groupId>com.google.cloud.genomics</groupId>
<artifactId>google-genomics-utils</artifactId>
<version>v1beta2-0.37</version>
<version>v1beta2-0.38</version>
<exclusions>
<!-- Exclude an old version of guava which is being pulled
in by a transitive dependency google-api-client 1.19.0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
import java.util.HashMap;
import java.util.Map;

import com.google.api.services.genomics.model.Call;
import com.google.api.services.genomics.model.Variant;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.utils.CallFilters;
import com.google.cloud.genomics.dataflow.utils.PairGenerator;
import com.google.cloud.genomics.utils.VariantUtils;
import com.google.cloud.genomics.utils.grpc.VariantUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.genomics.v1.Variant;
import com.google.genomics.v1.VariantCall;

/**
* For each pair of calls on any of the given variants, computes a score, a number between 0 and 1
Expand Down Expand Up @@ -55,14 +55,14 @@ public void processElement(ProcessContext context) {
Variant variant = context.element();
CallSimilarityCalculator callSimilarityCalculator =
callSimilarityCalculatorFactory.get(isReferenceMajor(variant));
for (KV<Call, Call> pair : PairGenerator.WITHOUT_REPLACEMENT.allPairs(
for (KV<VariantCall, VariantCall> pair : PairGenerator.WITHOUT_REPLACEMENT.allPairs(
getSamplesWithVariant(variant), VariantUtils.CALL_COMPARATOR)) {
accumulateCallSimilarity(callSimilarityCalculator, pair.getKey(), pair.getValue());
}
}

private void accumulateCallSimilarity(CallSimilarityCalculator callSimilarityCalculator,
Call call1, Call call2) {
VariantCall call1, VariantCall call2) {
KV<String, String> callPair = KV.of(call1.getCallSetName(), call2.getCallSetName());
KV<Double, Integer> callPairAccumulation = accumulator.get(callPair);
if (callPairAccumulation == null) {
Expand All @@ -79,15 +79,15 @@ public void finishBundle(Context context) {
output(context, accumulator);
}

static ImmutableList<Call> getSamplesWithVariant(Variant variant) {
static ImmutableList<VariantCall> getSamplesWithVariant(Variant variant) {
return CallFilters.getSamplesWithVariantOfMinGenotype(variant, 0);
}

static boolean isReferenceMajor(Variant variant) {
int referenceAlleles = 0;
int alternateAlleles = 0;
for (Call call : variant.getCalls()) {
for (Integer i : call.getGenotype()) {
for (VariantCall call : variant.getCallsList()) {
for (Integer i : call.getGenotypeList()) {
if (i == 0) {
++referenceAlleles;
} else if (i > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
*/
package com.google.cloud.genomics.dataflow.functions;

import com.google.api.services.genomics.model.Call;
import com.google.genomics.v1.VariantCall;

public interface CallSimilarityCalculator {

double similarity(Call call1, Call call2);
double similarity(VariantCall call1, VariantCall call2);

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
package com.google.cloud.genomics.dataflow.functions;

import com.google.api.services.genomics.model.Call;
import com.google.genomics.v1.VariantCall;

/**
* See <a href="http://konradjkarczewski.files.wordpress.com/2012/02/identity-howto.pdf">this
Expand All @@ -27,15 +27,15 @@ public class SharedAllelesRatioCalculator implements CallSimilarityCalculator {
// TODO: Double check that the following is the right way of computing the IBS
// scores when the number of alleles is different than 2 and when the genotypes are unphased.
@Override
public double similarity(Call call1, Call call2) {
int minNumberOfGenotypes = Math.min(call1.getGenotype().size(), call2.getGenotype().size());
public double similarity(VariantCall call1, VariantCall call2) {
int minNumberOfGenotypes = Math.min(call1.getGenotypeCount(), call2.getGenotypeCount());
int numberOfSharedAlleles = 0;
for (int i = 0; i < minNumberOfGenotypes; ++i) {
if (call1.getGenotype().get(i) == call2.getGenotype().get(i)) {
if (call1.getGenotype(i) == call2.getGenotype(i)) {
++numberOfSharedAlleles;
}
}
int maxNumberOfGenotypes = Math.max(call1.getGenotype().size(), call2.getGenotype().size());
int maxNumberOfGenotypes = Math.max(call1.getGenotypeCount(), call2.getGenotypeCount());
return (double) numberOfSharedAlleles / maxNumberOfGenotypes;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
*/
package com.google.cloud.genomics.dataflow.functions;

import com.google.api.services.genomics.model.Call;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.genomics.v1.VariantCall;

/**
* Computes the similarity of two calls based on whether they share a minor allele or not.
Expand All @@ -30,8 +30,8 @@ public SharedMinorAllelesCalculator(boolean isReferenceMajor) {
this.isReferenceMajor = isReferenceMajor;
}

private boolean hasMinorAllele(Call call) {
return Iterables.any(call.getGenotype(), new Predicate<Integer>() {
private boolean hasMinorAllele(VariantCall call) {
return Iterables.any(call.getGenotypeList(), new Predicate<Integer>() {

@Override
public boolean apply(Integer genotype) {
Expand All @@ -46,7 +46,7 @@ public boolean apply(Integer genotype) {
}

@Override
public double similarity(Call call1, Call call2) {
public double similarity(VariantCall call1, VariantCall call2) {
if (call1.getCallSetName().equals(call2.getCallSetName())) {
return 1.0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
import java.security.GeneralSecurityException;
import java.util.List;

import com.google.api.services.genomics.model.SearchVariantsRequest;
import com.google.api.services.genomics.model.Variant;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
Expand All @@ -32,14 +30,17 @@
import com.google.cloud.genomics.dataflow.functions.CallSimilarityCalculatorFactory;
import com.google.cloud.genomics.dataflow.functions.FormatIBSData;
import com.google.cloud.genomics.dataflow.functions.IBSCalculator;
import com.google.cloud.genomics.dataflow.functions.JoinNonVariantSegmentsWithVariants;
import com.google.cloud.genomics.dataflow.functions.grpc.JoinNonVariantSegmentsWithVariants;
import com.google.cloud.genomics.dataflow.readers.VariantReader;
import com.google.cloud.genomics.dataflow.readers.VariantStreamer;
import com.google.cloud.genomics.dataflow.utils.GenomicsDatasetOptions;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;
import com.google.cloud.genomics.dataflow.utils.IdentityByStateOptions;
import com.google.cloud.genomics.utils.GenomicsFactory;
import com.google.cloud.genomics.utils.ShardBoundary;
import com.google.cloud.genomics.utils.ShardUtils;
import com.google.genomics.v1.StreamVariantsRequest;
import com.google.genomics.v1.Variant;

/**
* A pipeline that computes Identity by State (IBS) for each pair of individuals in a dataset.
Expand All @@ -49,8 +50,8 @@
*/

public class IdentityByState {
private static final String VARIANT_FIELDS =
"nextPageToken,variants(start,calls(genotype,callSetName))";
// TODO: https://github.com/googlegenomics/utils-java/issues/48
private static final String VARIANT_FIELDS = "variants(start,calls(genotype,callSetName))";

public static void main(String[] args) throws IOException, GeneralSecurityException,
InstantiationException, IllegalAccessException {
Expand All @@ -62,26 +63,28 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept
GenomicsDatasetOptions.Methods.validateOptions(options);

GenomicsFactory.OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
List<SearchVariantsRequest> requests = options.isAllReferences() ?
ShardUtils.getPaginatedVariantRequests(options.getDatasetId(), ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
List<StreamVariantsRequest> requests = options.isAllReferences() ?
ShardUtils.getVariantRequests(options.getDatasetId(), ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getPaginatedVariantRequests(options.getDatasetId(), options.getReferences(), options.getBasesPerShard());
ShardUtils.getVariantRequests(options.getDatasetId(), options.getReferences(), options.getBasesPerShard());

Pipeline p = Pipeline.create(options);
p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER);
PCollection<SearchVariantsRequest> input = p.begin().apply(Create.of(requests));
PCollection<Variant> variants = p.begin()
.apply(Create.of(requests))
.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS));

PCollection<Variant> variants =
options.getHasNonVariantSegments()
PCollection<Variant> processedVariants;
if(options.getHasNonVariantSegments()) {
// Special handling is needed for data with non-variant segment records since IBS must
// take into account reference-matches in addition to the variants (unlike
// other analyses such as PCA).
? JoinNonVariantSegmentsWithVariants.joinVariantsTransform(input, auth,
JoinNonVariantSegmentsWithVariants.VARIANT_JOIN_FIELDS) : input.apply(ParDo.named(
VariantReader.class.getSimpleName()).of(
new VariantReader(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS)));
processedVariants = JoinNonVariantSegmentsWithVariants.joinVariantsTransform(variants);
} else {
processedVariants = variants;
}

variants
processedVariants
.apply(
ParDo.named(AlleleSimilarityCalculator.class.getSimpleName()).of(
new AlleleSimilarityCalculator(getCallSimilarityCalculatorFactory(options))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,41 @@
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

import java.util.Collections;
import java.util.List;

import org.hamcrest.CoreMatchers;
import org.junit.Test;

import com.google.api.services.genomics.model.Variant;
import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.utils.DataUtils;
import com.google.genomics.v1.Variant;

public class AlleleSimilarityCalculatorTest {

static final Variant snp1 = DataUtils.makeVariant("chr7", 200019, 200020, "T", Collections.singletonList("G"),
DataUtils.makeCall("het-alt sample", 1, 0), DataUtils.makeCall("hom-alt sample", 1, 1),
DataUtils.makeCall("hom-ref sample", 0, 0), DataUtils.makeCall("hom-nocall sample", -1, -1),
DataUtils.makeCall("ref-nocall sample", -1, 0));
static final Variant snp1 = Variant.newBuilder()
.setReferenceName("chr7")
.setStart(200019)
.setEnd(200020)
.setReferenceBases("T")
.addAlternateBases("G")
.addCalls(DataUtils.makeVariantCall("het-alt sample", 1, 0))
.addCalls(DataUtils.makeVariantCall("hom-alt sample", 1, 1))
.addCalls(DataUtils.makeVariantCall("hom-ref sample", 0, 0))
.addCalls(DataUtils.makeVariantCall("hom-nocall sample", -1, -1))
.addCalls(DataUtils.makeVariantCall("ref-nocall sample", -1, 0))
.build();

static final Variant snp2 = DataUtils.makeVariant("chr7", 200020, 200021, "C", Collections.singletonList("A"),
DataUtils.makeCall("hom-alt sample", 1, 1), DataUtils.makeCall("het-alt sample", 0, 1),
DataUtils.makeCall("ref-nocall sample", 0, -1));
static final Variant snp2 = Variant.newBuilder()
.setReferenceName("chr7")
.setStart(200020)
.setEnd(200021)
.setReferenceBases("C")
.addAlternateBases("A")
.addCalls(DataUtils.makeVariantCall("hom-alt sample", 1, 1))
.addCalls(DataUtils.makeVariantCall("het-alt sample", 0, 1))
.addCalls(DataUtils.makeVariantCall("ref-nocall sample", 0, -1))
.build();

@Test
public void testIsReferenceMajor() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import com.google.api.services.genomics.model.Call;
import com.google.api.services.genomics.model.Variant;
import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.utils.DataUtils;
import com.google.genomics.v1.Variant;
import com.google.genomics.v1.VariantCall;

@RunWith(JUnit4.class)
public class CallSimilarityCalculatorTest {

private static final double DELTA = 1e-6;

private List<Call> calls = newArrayList();
private List<VariantCall> calls = newArrayList();

private List<Variant> variants = newArrayList();

Expand All @@ -52,20 +52,20 @@ public class CallSimilarityCalculatorTest {

@Before
public void setUp() {
calls.add(DataUtils.makeCall(H1, 0, 0));
calls.add(DataUtils.makeCall(H2, 1, 0));
calls.add(DataUtils.makeCall(H3, 0, 1));
calls.add(DataUtils.makeCall(H2, 1, 1));
calls.add(DataUtils.makeCall(H3, 1, 1));
calls.add(DataUtils.makeCall(H2, 1));
calls.add(DataUtils.makeCall(H3, 0));
calls.add(DataUtils.makeCall(H2, 1, 0, 1));
calls.add(DataUtils.makeCall(H3, 1, 0, 0));

variants.add(DataUtils.makeSimpleVariant(calls.get(0), calls.get(1), calls.get(2)));
variants.add(DataUtils.makeSimpleVariant(calls.get(0), calls.get(3), calls.get(4)));
variants.add(DataUtils.makeSimpleVariant(calls.get(0), calls.get(5), calls.get(6)));
variants.add(DataUtils.makeSimpleVariant(calls.get(0), calls.get(7), calls.get(8)));
calls.add(DataUtils.makeVariantCall(H1, 0, 0));
calls.add(DataUtils.makeVariantCall(H2, 1, 0));
calls.add(DataUtils.makeVariantCall(H3, 0, 1));
calls.add(DataUtils.makeVariantCall(H2, 1, 1));
calls.add(DataUtils.makeVariantCall(H3, 1, 1));
calls.add(DataUtils.makeVariantCall(H2, 1));
calls.add(DataUtils.makeVariantCall(H3, 0));
calls.add(DataUtils.makeVariantCall(H2, 1, 0, 1));
calls.add(DataUtils.makeVariantCall(H3, 1, 0, 0));

variants.add(Variant.newBuilder().addCalls(calls.get(0)).addCalls(calls.get(1)).addCalls(calls.get(2)).build());
variants.add(Variant.newBuilder().addCalls(calls.get(0)).addCalls(calls.get(3)).addCalls(calls.get(4)).build());
variants.add(Variant.newBuilder().addCalls(calls.get(0)).addCalls(calls.get(5)).addCalls(calls.get(6)).build());
variants.add(Variant.newBuilder().addCalls(calls.get(0)).addCalls(calls.get(7)).addCalls(calls.get(8)).build());
}

@Test
Expand Down