diff --git a/README.md b/README.md index b393bbe..6bb402c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # SingleCellWebApp Bioinformatic technique team project +Here are the outputs we got from running our codes. + +![PHOTO-2025-05-23-22-18-02](https://github.com/user-attachments/assets/052b7fae-295a-41b5-8f4c-9efb99e6365b) +Above is the output from rank genes function + +![PHOTO-2025-05-23-22-32-16](https://github.com/user-attachments/assets/f7fe0897-e9b4-4412-be4a-c63479aa43b9) +This one is the violin plot output. + +![PHOTO-2025-05-23-22-52-52](https://github.com/user-attachments/assets/62fcd331-dd72-4e4a-930a-e8698a6f8d01) +This one is from marker genes code. + +![PHOTO-2025-05-23-21-32-06](https://github.com/user-attachments/assets/30361fa4-fdc4-43cf-83a1-15c57454cbc1) +Lastly, this is the output we got from the get scores p-values function. diff --git a/data/pbmc3k_raw.h5ad b/data/pbmc3k_raw.h5ad new file mode 100644 index 0000000..460ee0b Binary files /dev/null and b/data/pbmc3k_raw.h5ad differ diff --git a/rank_genes_violin_demo.py b/rank_genes_violin_demo.py new file mode 100644 index 0000000..879a1f0 --- /dev/null +++ b/rank_genes_violin_demo.py @@ -0,0 +1,17 @@ +from rank_genes_violin import get_rank_genes_groups_violin +import scanpy as sc + +#sample dataset +adata = sc.datasets.pbmc3k() + +# calculating neighborhoods +sc.pp.neighbors(adata) + +sc.tl.leiden(adata, resolution=1.0) + +# Differential expression analysis (with t-test) +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# calling the function and visualizing +get_rank_genes_groups_violin(adata, groups='0', n_genes=8) diff --git a/src/modules/SingleCellWebApp b/src/modules/SingleCellWebApp new file mode 160000 index 0000000..655ed2d --- /dev/null +++ b/src/modules/SingleCellWebApp @@ -0,0 +1 @@ +Subproject commit 655ed2d37d10c4c3830633da388db467fe845571 diff --git a/src/modules/get_marker_genes_demo.py b/src/modules/get_marker_genes_demo.py new file mode 100644 index 0000000..2ff2dbb --- /dev/null +++ b/src/modules/get_marker_genes_demo.py @@ -0,0 +1,18 @@ +import scanpy as sc +import pandas as pd +from markedgenes import get_marker_genes # defined function in main code + +# Demo dataset +adata = sc.datasets.pbmc3k() + +#preprocessing and clustering (same with the main code) +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) # Leiden clustering yap + +# ranking genes between groups +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) diff --git a/src/modules/get_rank_genes_demo.py b/src/modules/get_rank_genes_demo.py new file mode 100644 index 0000000..45380a1 --- /dev/null +++ b/src/modules/get_rank_genes_demo.py @@ -0,0 +1,23 @@ +from rank_genes_group import rank_genes_groups_custom +import scanpy as sc + +sc.datasets.pbmc3k() +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA and neighborhoods +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +sc.settings.verbosity = 2 + +#to save results +results_file = "pbmc3k_rank_genes.h5ad" +rank_genes_groups_custom(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False) +adata.write(results_file) \ No newline at end of file diff --git a/src/modules/get_scors_and_pvals.py b/src/modules/get_scors_and_pvals.py new file mode 100644 index 0000000..f079087 --- /dev/null +++ b/src/modules/get_scors_and_pvals.py @@ -0,0 +1,36 @@ +import scanpy as sc +import pandas as pd +from rank_genes_group import rank_genes_groups + +# Load the dataset +adata = sc.datasets.pbmc3k() + +# Apply PCA to reduce dimensionality (to avoid high-dimension warning) +sc.pp.pca(adata, svd_solver='arpack') + +# Calculate neighbors on PCA-reduced data +sc.pp.neighbors(adata) + +# Run Leiden algorithm (with future-proofing for 'igraph' backend) +sc.tl.leiden(adata, resolution=1.0, flavor='igraph', directed=False, n_iterations=2) + +# Run differential expression analysis to rank genes +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# Reduce verbosity for cleaner output +sc.settings.verbosity = 2 + +# Define a function to get gene names and p-values from rank_genes_groups +def get_scores_and_pvals(adata): + result = adata.uns['rank_genes_groups'] + groups = result['names'].dtype.names + df = pd.DataFrame( + {group + '_' + key[:1]: result[key][group] + for group in groups for key in ['names', 'pvals']}).head(5) + return df + +# Run the function to get the results +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) + diff --git a/src/modules/get_scors_and_pvals_demo.py b/src/modules/get_scors_and_pvals_demo.py new file mode 100644 index 0000000..87429ce --- /dev/null +++ b/src/modules/get_scors_and_pvals_demo.py @@ -0,0 +1,17 @@ +from get_scors_and_pvals import get_scores_and_pvals +import scanpy as sc +import pandas as pd + +# Demo dataset +adata = sc.datasets.pbmc3k() + +# ranking genes between groups +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) + \ No newline at end of file diff --git a/src/modules/pvals_demo.py b/src/modules/pvals_demo.py new file mode 100644 index 0000000..3445c66 --- /dev/null +++ b/src/modules/pvals_demo.py @@ -0,0 +1,16 @@ +from get_scors_and_pvals import get_scores_and_pvals +import scanpy as sc +import pandas as pd + +# Demo dataset +adata = sc.datasets.pbmc3k() + +# ranking genes between groups +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) \ No newline at end of file diff --git a/src/modules/rank_genes_group_demo.py b/src/modules/rank_genes_group_demo.py new file mode 100644 index 0000000..1b409d3 --- /dev/null +++ b/src/modules/rank_genes_group_demo.py @@ -0,0 +1,24 @@ +from rankgenesgroup import rank_genes_groups +import scanpy as sc + +adata = sc.datasets.pbmc3k() + +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA and neighborhoods +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +sc.settings.verbosity = 2 + +#to save results +results_file = "pbmc3k_rank_genes.h5ad" +sc.tl.rank_genes_groups(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False) +adata.write(results_file) diff --git a/src/modules/rank_genes_violin.py b/src/modules/rank_genes_violin.py new file mode 100644 index 0000000..022747d --- /dev/null +++ b/src/modules/rank_genes_violin.py @@ -0,0 +1,18 @@ +import scanpy as sc +adata = sc.datasets.pbmc3k() +sc.pp.neighbors(adata) +sc.tl.leiden(adata, resolution=1.0) +sc.settings.verbosity = 2 # reduce the verbosity + +def get_rank_genes_groups_violin(adata, groups='0', n_genes=8): + """ + -Plots a violin plot for the top ranked genes in specified groups (clusters). + + Parameters: + - adata: AnnData object with results from rank_genes_groups + - groups: List or string specifying the groups (clusters) to plot (default is all groups) + - n_genes: Number of top genes to display in the plot (default is 8) + + """ + sc.pl.rank_genes_groups_violin(adata, groups=groups, n_genes=n_genes) + return adata diff --git a/src/modules/rank_genes_violin_demo.py b/src/modules/rank_genes_violin_demo.py new file mode 100644 index 0000000..879a1f0 --- /dev/null +++ b/src/modules/rank_genes_violin_demo.py @@ -0,0 +1,17 @@ +from rank_genes_violin import get_rank_genes_groups_violin +import scanpy as sc + +#sample dataset +adata = sc.datasets.pbmc3k() + +# calculating neighborhoods +sc.pp.neighbors(adata) + +sc.tl.leiden(adata, resolution=1.0) + +# Differential expression analysis (with t-test) +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# calling the function and visualizing +get_rank_genes_groups_violin(adata, groups='0', n_genes=8) diff --git a/src/modules/rankgenesgroup.py b/src/modules/rankgenesgroup.py new file mode 100644 index 0000000..795fd94 --- /dev/null +++ b/src/modules/rankgenesgroup.py @@ -0,0 +1,36 @@ +import scanpy as sc + +# Örnek veri setini yükle +adata = sc.datasets.pbmc3k() + +# Temel ön işleme +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA ve komşuluk hesaplama +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +# Verbosity ayarı +sc.settings.verbosity = 2 + +# Sonuçları kaydetmek için bir dosya ismi belirle +results_file = "pbmc3k_rank_genes.h5ad" + +# Gen sıralama ve görselleştirme fonksiyonu +def rank_genes_groups(adata, method='t-test', n_genes=25, sharey=False): + """ + - Perform differential expression analysis and plot top marker genes. + """ + sc.tl.rank_genes_groups(adata, groupby='leiden', method=method) + sc.pl.rank_genes_groups(adata, n_genes=n_genes, sharey=sharey) + adata.write(results_file) + +# Fonksiyonu çağır +rank_genes_groups(adata)