Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
# SingleCellWebApp
Bioinformatic technique team project
Here are the outputs we got from running our codes.

![PHOTO-2025-05-23-22-18-02](https://github.com/user-attachments/assets/052b7fae-295a-41b5-8f4c-9efb99e6365b)
Above is the output from rank genes function

![PHOTO-2025-05-23-22-32-16](https://github.com/user-attachments/assets/f7fe0897-e9b4-4412-be4a-c63479aa43b9)
This one is the violin plot output.

![PHOTO-2025-05-23-22-52-52](https://github.com/user-attachments/assets/62fcd331-dd72-4e4a-930a-e8698a6f8d01)
This one is from marker genes code.

![PHOTO-2025-05-23-21-32-06](https://github.com/user-attachments/assets/30361fa4-fdc4-43cf-83a1-15c57454cbc1)
Lastly, this is the output we got from the get scores p-values function.
Binary file added data/pbmc3k_raw.h5ad
Binary file not shown.
17 changes: 17 additions & 0 deletions rank_genes_violin_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from rank_genes_violin import get_rank_genes_groups_violin
import scanpy as sc

#sample dataset
adata = sc.datasets.pbmc3k()

# calculating neighborhoods
sc.pp.neighbors(adata)

sc.tl.leiden(adata, resolution=1.0)

# Differential expression analysis (with t-test)
sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test')


# calling the function and visualizing
get_rank_genes_groups_violin(adata, groups='0', n_genes=8)
1 change: 1 addition & 0 deletions src/modules/SingleCellWebApp
Submodule SingleCellWebApp added at 655ed2
18 changes: 18 additions & 0 deletions src/modules/get_marker_genes_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import scanpy as sc
import pandas as pd
from markedgenes import get_marker_genes # defined function in main code

# Demo dataset
adata = sc.datasets.pbmc3k()

#preprocessing and clustering (same with the main code)
sc.pp.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata)
sc.tl.leiden(adata) # Leiden clustering yap

# ranking genes between groups
sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')

# recalling the function
scores_and_pvals = get_scores_and_pvals(adata)
print(scores_and_pvals)
23 changes: 23 additions & 0 deletions src/modules/get_rank_genes_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from rank_genes_group import rank_genes_groups_custom
import scanpy as sc

sc.datasets.pbmc3k()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)

# PCA and neighborhoods
sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer
sc.pp.neighbors(adata, n_pcs=40)

# Clustering (Leiden)
sc.tl.leiden(adata, resolution=1.0)

sc.settings.verbosity = 2

#to save results
results_file = "pbmc3k_rank_genes.h5ad"
rank_genes_groups_custom(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False)
adata.write(results_file)
36 changes: 36 additions & 0 deletions src/modules/get_scors_and_pvals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import scanpy as sc
import pandas as pd
from rank_genes_group import rank_genes_groups

# Load the dataset
adata = sc.datasets.pbmc3k()

# Apply PCA to reduce dimensionality (to avoid high-dimension warning)
sc.pp.pca(adata, svd_solver='arpack')

# Calculate neighbors on PCA-reduced data
sc.pp.neighbors(adata)

# Run Leiden algorithm (with future-proofing for 'igraph' backend)
sc.tl.leiden(adata, resolution=1.0, flavor='igraph', directed=False, n_iterations=2)

# Run differential expression analysis to rank genes
sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test')


# Reduce verbosity for cleaner output
sc.settings.verbosity = 2

# Define a function to get gene names and p-values from rank_genes_groups
def get_scores_and_pvals(adata):
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names', 'pvals']}).head(5)
return df

# Run the function to get the results
scores_and_pvals = get_scores_and_pvals(adata)
print(scores_and_pvals)

17 changes: 17 additions & 0 deletions src/modules/get_scors_and_pvals_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from get_scors_and_pvals import get_scores_and_pvals
import scanpy as sc
import pandas as pd

# Demo dataset
adata = sc.datasets.pbmc3k()

# ranking genes between groups
sc.pp.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata)
sc.tl.leiden(adata)
sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')

# recalling the function
scores_and_pvals = get_scores_and_pvals(adata)
print(scores_and_pvals)

16 changes: 16 additions & 0 deletions src/modules/pvals_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from get_scors_and_pvals import get_scores_and_pvals
import scanpy as sc
import pandas as pd

# Demo dataset
adata = sc.datasets.pbmc3k()

# ranking genes between groups
sc.pp.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata)
sc.tl.leiden(adata)
sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')

# recalling the function
scores_and_pvals = get_scores_and_pvals(adata)
print(scores_and_pvals)
24 changes: 24 additions & 0 deletions src/modules/rank_genes_group_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from rankgenesgroup import rank_genes_groups
import scanpy as sc

adata = sc.datasets.pbmc3k()

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)

# PCA and neighborhoods
sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer
sc.pp.neighbors(adata, n_pcs=40)

# Clustering (Leiden)
sc.tl.leiden(adata, resolution=1.0)

sc.settings.verbosity = 2

#to save results
results_file = "pbmc3k_rank_genes.h5ad"
sc.tl.rank_genes_groups(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False)
adata.write(results_file)
18 changes: 18 additions & 0 deletions src/modules/rank_genes_violin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import scanpy as sc
adata = sc.datasets.pbmc3k()
sc.pp.neighbors(adata)
sc.tl.leiden(adata, resolution=1.0)
sc.settings.verbosity = 2 # reduce the verbosity

def get_rank_genes_groups_violin(adata, groups='0', n_genes=8):
"""
-Plots a violin plot for the top ranked genes in specified groups (clusters).

Parameters:
- adata: AnnData object with results from rank_genes_groups
- groups: List or string specifying the groups (clusters) to plot (default is all groups)
- n_genes: Number of top genes to display in the plot (default is 8)

"""
sc.pl.rank_genes_groups_violin(adata, groups=groups, n_genes=n_genes)
return adata
17 changes: 17 additions & 0 deletions src/modules/rank_genes_violin_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from rank_genes_violin import get_rank_genes_groups_violin
import scanpy as sc

#sample dataset
adata = sc.datasets.pbmc3k()

# calculating neighborhoods
sc.pp.neighbors(adata)

sc.tl.leiden(adata, resolution=1.0)

# Differential expression analysis (with t-test)
sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test')


# calling the function and visualizing
get_rank_genes_groups_violin(adata, groups='0', n_genes=8)
36 changes: 36 additions & 0 deletions src/modules/rankgenesgroup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import scanpy as sc

# Örnek veri setini yükle
adata = sc.datasets.pbmc3k()

# Temel ön işleme
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)

# PCA ve komşuluk hesaplama
sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer
sc.pp.neighbors(adata, n_pcs=40)

# Clustering (Leiden)
sc.tl.leiden(adata, resolution=1.0)

# Verbosity ayarı
sc.settings.verbosity = 2

# Sonuçları kaydetmek için bir dosya ismi belirle
results_file = "pbmc3k_rank_genes.h5ad"

# Gen sıralama ve görselleştirme fonksiyonu
def rank_genes_groups(adata, method='t-test', n_genes=25, sharey=False):
"""
- Perform differential expression analysis and plot top marker genes.
"""
sc.tl.rank_genes_groups(adata, groupby='leiden', method=method)
sc.pl.rank_genes_groups(adata, n_genes=n_genes, sharey=sharey)
adata.write(results_file)

# Fonksiyonu çağır
rank_genes_groups(adata)