Skip to content
This repository was archived by the owner on Oct 29, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.cloud.genomics</groupId>
<artifactId>gatk-tools-java</artifactId>
<version>1.0</version>
</dependency>
</dependencies>

<profiles>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.google.cloud.genomics.dataflow.readers.bam;

import com.google.api.client.util.Maps;
import com.google.api.services.genomics.model.CigarUnit;
import com.google.api.services.genomics.model.LinearAlignment;
import com.google.api.services.genomics.model.Position;
Expand All @@ -23,15 +24,12 @@
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Lists;

import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.*;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We prefer to avoid wildcard imports (Google Java style)

import htsjdk.samtools.util.SequenceUtil;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
* Converts SAMRecords to Reads.
Expand Down Expand Up @@ -62,7 +60,7 @@ public static final Read makeRead(final SAMRecord record) {
read.setId(record.getReadName()); // TODO: make more unique
read.setFragmentName(record.getReadName());
read.setReadGroupId(getAttr(record, "RG"));
read.setNumberReads(record.getReadPairedFlag() ? 1 : 2);
read.setNumberReads(record.getReadPairedFlag() ? 2 : 1);
read.setProperPlacement(record.getReadPairedFlag() && record.getProperPairFlag());
if (!record.getReadUnmappedFlag() && record.getAlignmentStart() > 0) {
LinearAlignment alignment = new LinearAlignment();
Expand Down Expand Up @@ -97,7 +95,7 @@ public CigarUnit apply(CigarElement c) {
read.setAlignment(alignment);
}
read.setDuplicateFragment(record.getDuplicateReadFlag());
read.setFragmentLength(record.getReadLength());
read.setFragmentLength(record.getInferredInsertSize());
if (record.getReadPairedFlag()) {
if (record.getFirstOfPairFlag()) {
read.setReadNumber(0);
Expand Down Expand Up @@ -126,6 +124,12 @@ public CigarUnit apply(CigarElement c) {
read.setAlignedQuality(readBaseQualities);
}

Map<String, List<String>> attributes = Maps.newHashMap();
for( SAMRecord.SAMTagAndValue tagAndValue: record.getAttributes()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'for (' rather than 'for( '

attributes.put(tagAndValue.tag, Lists.newArrayList(tagAndValue.value.toString()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment explaining the problem with multiple values?

}
read.setInfo(attributes);

return read;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
package com.google.cloud.genomics.dataflow.readers.bam;

import com.google.api.services.genomics.model.Read;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

import java.util.Arrays;
import java.util.Comparator;
import java.util.List;

import org.junit.Before;
import com.google.cloud.genomics.gatk.common.GenomicsConverter;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import htsjdk.samtools.SAMRecord;
import java.io.File;
import java.io.IOException;

import static org.junit.Assert.assertEquals;

@RunWith(JUnit4.class)
public class ReadConverterTest {
Expand All @@ -41,5 +38,22 @@ public void testConversion() {
assertEquals("chr20", read.getNextMatePosition().getReferenceName());
assertEquals((Boolean)true, read.getNextMatePosition().getReverseStrand());
}


@Test
public void SamToReadToSamTest() throws IOException {
String filePath = "src/test/resources/com/google/cloud/genomics/dataflow/readers/bam/conversion_test.sam";
File samInput = new File(filePath);
SamReader reads = SamReaderFactory.makeDefault().open(samInput);
SAMFileHeader header = reads.getFileHeader();

int numReads = 0;
for (SAMRecord sam : reads){
Read read = ReadConverter.makeRead(sam);
SAMRecord newSam = GenomicsConverter.makeSAMRecord(read, header );
assertEquals(newSam.getSAMString(), sam.getSAMString());
numReads++;
}
assertEquals(19, numReads);//sanity check to make sure we actually read the file
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
@HD VN:1.0 SO:coordinate
@SQ SN:chr1 LN:101
@SQ SN:chr2 LN:101
@SQ SN:chr3 LN:101
@SQ SN:chr4 LN:101
@SQ SN:chr5 LN:101
@SQ SN:chr6 LN:101
@SQ SN:chr7 LN:202
@SQ SN:chr8 LN:202
@RG ID:0 SM:Hi,Momma! LB:whatever PU:me PL:ILLUMINA
SL-XAV:1:1:0:764#0/1 89 chr1 1 255 101M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCAN &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0& RG:Z:0 NN:Z:Hello
SL-XAV:1:1:0:1668#0/2 153 chr2 1 255 101M * 0 0 CATCTCTACANGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATACTTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTNGACACCTTTN (/,'-/'0////(1'&&1&&&&'2''-6/,/3-33653.6:1'.86/-++32.-4864653/5/583/346423203+28888644446688456/4880& RG:Z:0 NN:Z:Goodbye
SL-XAV:1:1:0:1914#0/2 153 chr3 1 255 101M * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0
SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0
SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0
SL-XAV:1:1:0:700#0/2 137 chr6 1 255 101M * 0 0 NAATTGTTCTNAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACAATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACNAGTGTCGATC &0::887::::6/646::838388811/679:87640&./2+/-4/28:3,536/4''&&.78/(/554/./02*)*',-(57()&.6(6:(0601'/(,* RG:Z:0
SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGGNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCGAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
SL-XAV:1:1:0:105#0/2 403 chr7 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0
SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
SL-XAV:1:1:0:105#0/2 147 chr8 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0
SL-XAV:1:1:0:105#0/1 99 chr8 102 255 101M = 1 -79 NCAGGTTCAANTGTGCAGCCCNTTTTGAGAGATNNNNNNNNTGNNCTGNAANANNGACACAGCTATTCCTAAGATGACAAGATCAGANAANAAGTCAAGCA &06665578::41.*/7577/&/77403-324.&&&&&&&&/.&&..&&.0&&&&',:9:/-/(55002020+3'12+2/&.2-&//&),&*&&&&&&&51 RG:Z:0
SL-XAV:1:1:0:1300#0/1 77 * 0 0 * * 0 0 NAAACACAAGNNANAGTCTTANCNGCTATTCCNNNNNNNNNCTNNNCTNAGNANNACATACAACAGTATCCACACAAGTGTACTCGTNCANACATGTGAAC &*5535)*-,,&.&.*-1)*,&'&)&1&&.,)&&&&&&&&&)0&&&0'&&&&.&&*2'/4''0/**&)&,'-&*,&,&&&.0.&)&&&**&,.&&&')&&) RG:Z:0
SL-XAV:1:1:0:1300#0/2 141 * 0 0 * * 0 0 NGATCATGGANGACTCTCCCCATCCCCCGCTCCAGCGCTCAGTTATATGCCTAGCCTCGGACACGTCACCAACATCTCACGCACTCTGCANAGTCTCTCAC &&'+''3*&-/)/1'26/*-2-/542-*&-&/'/*/&-'&)-')&.'-/&&2+122*'&+,(/-&)((,/-,,.'2(2'+)/&/&-66-&&/16&)&*&'3 RG:Z:0
SL-XAV:1:1:0:1639#0/1 101 * 0 0 * chr1 1 0 NCCCTCTCAGNNTNTCTGCCANANCCTTAAGCNNNNNNNNNTANNNCTNAANCNNAAACTTTTGCCTCAGGCATCCGCAGAATGTTTNTCNGCCTATATCG &1::::::64/&/&0:3.280&/&087881,/&&&&&&&&&..&&&..&,,&-&&,265341-)/5680&-.5552-25/322/42/&)&&).421&-&-/ RG:Z:0
SL-XAV:1:1:0:1668#0/1 101 * 0 0 * chr2 1 0 NATAGCATACNNTNCATTGGANTNCAGCACAANNNNNNNNNTGNNNCANTNNANNCCTTTGAGATCGGAAGAGCGGTTCAGCAGGAANNCNCAGACCGATC &1988998890&0&.8863//&.&.0-2875.&&&&&&&&&.)&&&..&.&&.&&.5782-2+262)&-0-0510*.332-2.-,0*&&*&'.&-2-)0., RG:Z:0
SL-XAV:1:1:0:1914#0/1 101 * 0 0 * chr3 1 0 NTTTTTCTCCNNCNGTGCCTANTNTAGCCCCTNNNNNNNNNAANNNATNANNANNTTTACTTAAAAAACTGAAACTAGTAATGTGCANNANATCGNAAGAG &0::::<<;90&/&.244760&,&.414798/&&&&&&&&&00&&&0.&/&&-&&.4475687363504.&.557/.*)65.&/*./&&.&.+*)&..).& RG:Z:0
SL-XAV:1:1:0:68#0/1 581 * 0 0 * chr4 1 0 NAATATTCATNNGNTCAGCCTNTNCATTAATTNNNNNNNNNTTNNNATNATNANNTTTTTTATAACCATTTATAAATGAGAGAGATCNTANCACAATATCA &0<<:::::</&&&.73'290&.&0;:::90&&&&&&&&&&..&&&0)&0-&0&&&.743799995253348597921.,.'050.*&.0&)*)&&&&*). RG:Z:0
SL-XAV:1:1:0:700#0/1 581 * 0 0 * chr5 1 0 NGAAGCCCATNNTNGTGTTACNCNCCTGGAAGNNNNNNNNNACNNNGANACNTNNAACAATTCAGATCGGAAGAGCGGTTCAGCAGANNTNCCGAGACCGA &.88888:88/&0&,03189.&/&.8/))12/&&&&&&&&&./&&&&.&1.&)&&/35962/6432-3&),0&/2+0,),61&-6,&&&'&/,.0&...)0 RG:Z:0
SL-XAV:1:1:0:764#0/2 165 * 0 0 * chr6 1 0 NACAGATGCANATATTAACAGGCTTTAAAGGACAGATGGACTGCAATACAATAATAGAGTACGTCAACACTCCACAGATCGCTAGAGCATNACATCGGTGT &/:5358::9999::99998255::7275,,/5567-'+387537857:54-4.51'31059547320;73/720+22.4(6.;((.;(;8()(''&&2&& RG:Z:0