-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAPI_code.py
More file actions
88 lines (77 loc) · 3.44 KB
/
API_code.py
File metadata and controls
88 lines (77 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import requests
import csv
from Bio import Entrez
#get the info from kegg: Kyoto Encyclopedia of Genes and Genomes
#for understanding functions of bio systems from genome sequencing and other high-throughput technologies
def get_kegg_gene_info(organism_code):
url = f"http://rest.kegg.jp/list/{organism_code}" #url
response = requests.get(url)
if response.status_code == 200: #success
return response.text.splitlines()
else:
print(f"Failed to retrieve gene information from KEGG for {organism_code}") #error message
return []
# Function to retrieve gene annotations from KEGG
def get_kegg_gene_annotations(gene_id):
url = f"http://rest.kegg.jp/get/{gene_id}" #kegg url
response = requests.get(url)
if response.status_code == 200: #success
return response.text
else:
print(f"Failed to retrieve annotations from KEGG for {gene_id}") #print error message
return ""
#retrieve gene information from NCBI Entrez
def get_ncbi_gene_info(term, db="gene", retmax=10):
Entrez.email = "" #insert_email
handle = Entrez.esearch(db=db, term=term, retmax=retmax)
record = Entrez.read(handle)
handle.close()
return record["IdList"]
#retrieve gene name and annotations from NCBI Entrez
def get_ncbi_gene_details(gene_id):
handle = Entrez.efetch(db="gene", id=gene_id, rettype="xml")
record = Entrez.read(handle)
handle.close()
#initialize gene name and annotations info
gene_name = ""
annotations = ""
#get gene locus
if record:
if "Entrezgene_locus" in record[0]:
gene_locus = record[0]["Entrezgene_locus"]
if gene_locus:
gene_name = gene_locus[0].get("Gene-commentary_products", [{}])[0].get("Gene-commentary_label", "")
#get gene annotations
if "Entrezgene_commentary" in record[0]:
commentary = record[0]["Entrezgene_commentary"]
if commentary:
for comment in commentary:
if "Gene-commentary_comment" in comment:
annotations += comment["Gene-commentary_comment"] + "\n"
return gene_name, annotations.strip()
#usage
term = "" #search term
ncbi_gene_ids = get_ncbi_gene_info(term) #run ncbi function
# Write gene information and annotations to a file
output_file = "" #output file name
with open(output_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter="\t") #use csv writer
writer.writerow(["Source", "Gene ID", "Gene Name", "Annotations"]) #format output table
#kegg info
organism_code = " " #organism code of choice here
kegg_gene_info = get_kegg_gene_info(organism_code) #kegg function
for line in kegg_gene_info: #extrapolate info from kegg
parts = line.split("\t")
gene_id = parts[0] if parts else ""
gene_name = parts[1] if len(parts) > 1 else ""
annotations = get_kegg_gene_annotations(gene_id)
writer.writerow(["KEGG", gene_id, gene_name, annotations])
#get gene information and annotations from NCBI Entrez
for gene_id in ncbi_gene_ids:
gene_name, annotations = get_ncbi_gene_details(gene_id) #gene details function
if annotations:
if len(annotations) >= 3 and annotations[2] == gene_name:
writer.writerow(["NCBI", gene_id, gene_name, annotations])
else:
writer.writerow(["NCBI", gene_id, gene_name, annotations])
print(f"Data has been written to {output_file}") #completion message