From d1ffbea4bc8205288715d58b596bbd95eeab4b3f Mon Sep 17 00:00:00 2001 From: birsenyrtds Date: Mon, 7 Apr 2025 17:49:05 +0300 Subject: [PATCH 1/3] Move demo_pcanlysis.py into pcaprocess directory --- pcaprocess/demo_pcanlysis.py | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 pcaprocess/demo_pcanlysis.py diff --git a/pcaprocess/demo_pcanlysis.py b/pcaprocess/demo_pcanlysis.py new file mode 100644 index 0000000..10a0607 --- /dev/null +++ b/pcaprocess/demo_pcanlysis.py @@ -0,0 +1,93 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Define the PCA functions in the same file or import them correctly +# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +# Generate random gene expression data +data = np.random.rand(n_cells, n_genes) + +# Create an AnnData object +adata = sc.AnnData(X=data) + +# Adding cell types (obs) +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA graph +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) From 8ff1efba7d650d78ba9580f6dff6618883bc42b4 Mon Sep 17 00:00:00 2001 From: birsenyrtds Date: Tue, 8 Apr 2025 15:54:59 +0300 Subject: [PATCH 2/3] Add last PCA process files --- SingleCellWebApp | 1 + demo_pcanlysis.py | 93 ----------------------------------------------- 2 files changed, 1 insertion(+), 93 deletions(-) create mode 160000 SingleCellWebApp delete mode 100644 demo_pcanlysis.py diff --git a/SingleCellWebApp b/SingleCellWebApp new file mode 160000 index 0000000..a9ae786 --- /dev/null +++ b/SingleCellWebApp @@ -0,0 +1 @@ +Subproject commit a9ae7869927e2230f9e97f2990892d72ad2bb246 diff --git a/demo_pcanlysis.py b/demo_pcanlysis.py deleted file mode 100644 index 10a0607..0000000 --- a/demo_pcanlysis.py +++ /dev/null @@ -1,93 +0,0 @@ -import scanpy as sc -import numpy as np -import matplotlib.pyplot as plt - -# Define the PCA functions in the same file or import them correctly -# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata - -# Step 1: Create random data -n_cells, n_genes = 100, 2000 -np.random.seed(42) - -# Generate random gene expression data -data = np.random.rand(n_cells, n_genes) - -# Create an AnnData object -adata = sc.AnnData(X=data) - -# Adding cell types (obs) -adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] -adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] - -# Step 2: Apply PCA -def run_pca(adata, n_comps=50, svd_solver='arpack'): - """Runs PCA and stores the computed components.""" - try: - if 'X_pca' in adata.obsm: - print("PCA already computed. Overwriting previous results...") - - print(f"Running PCA with {n_comps} components using {svd_solver} solver...") - sc.pp.normalize_total(adata, target_sum=1e4) # Normalization - sc.pp.log1p(adata) # Log transformation - sc.pp.scale(adata) # Scaling - sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) - - print("PCA completed.") - except Exception as e: - print(f"Error during PCA: {e}") - raise - -run_pca(adata, n_comps=10, svd_solver='arpack') - -# Step 3: Plot PCA graph -def plot_pca(adata, color=None): - """Plots PCA results, colored by a specified attribute (if provided).""" - try: - print(f"Plotting PCA, color by: {color or 'default'}") - sc.pl.pca(adata, color=color, show=False) - plt.title(f"PCA - Colored by {color if color else 'default'}") - plt.show() - except KeyError: - print(f"Warning: '{color}' not found. Using default coloring.") - sc.pl.pca(adata, show=False) - plt.title("PCA - Default Coloring") - plt.show() - except Exception as e: - print(f"Error in PCA plot: {e}") - raise - -plot_pca(adata, color='cell_type') - -# Step 4: Plot explained variance -def plot_variance(adata, log=True): - """Plots the variance explained by PCA components.""" - try: - print("Plotting explained variance...") - sc.pl.pca_variance_ratio(adata, log=log, show=False) - plt.title("PCA: Explained Variance") - plt.show() - except Exception as e: - print(f"Error in variance plot: {e}") - raise - -plot_variance(adata) - -# Step 5: Save PCA results -def save_results(adata, results_file="pca_results.h5ad"): - """Saves the PCA results to an H5AD file.""" - try: - print(f"Saving results to {results_file}...") - adata.write(results_file) - print("Save successful.") - except Exception as e: - print(f"Error saving results: {e}") - raise - -save_results(adata, "pca_results.h5ad") - -# Step 6: Retrieve processed AnnData object -def get_adata(adata): - """Returns the processed AnnData object.""" - return adata - -processed_adata = get_adata(adata) From dd1d19f88e41ccd608d98391bc37332b312c1bc6 Mon Sep 17 00:00:00 2001 From: birsenyrtds Date: Sat, 24 May 2025 00:38:24 +0300 Subject: [PATCH 3/3] Update src folder with content from pcaprocess --- src/demo_pcanlysis.py | 93 +++++++++++++++++++++++++++++++++++ src/modules/demo_pcanlysis.py | 78 +++++++++++++++++++++++++++++ src/modules/qc.py | 52 ++++++++++++++++++++ src/pca_analysis.py | 60 ++++++++++++++++++++++ 4 files changed, 283 insertions(+) create mode 100644 src/demo_pcanlysis.py create mode 100644 src/modules/demo_pcanlysis.py create mode 100644 src/pca_analysis.py diff --git a/src/demo_pcanlysis.py b/src/demo_pcanlysis.py new file mode 100644 index 0000000..10a0607 --- /dev/null +++ b/src/demo_pcanlysis.py @@ -0,0 +1,93 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Define the PCA functions in the same file or import them correctly +# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +# Generate random gene expression data +data = np.random.rand(n_cells, n_genes) + +# Create an AnnData object +adata = sc.AnnData(X=data) + +# Adding cell types (obs) +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA graph +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) diff --git a/src/modules/demo_pcanlysis.py b/src/modules/demo_pcanlysis.py new file mode 100644 index 0000000..2e16cf3 --- /dev/null +++ b/src/modules/demo_pcanlysis.py @@ -0,0 +1,78 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +data = np.random.rand(n_cells, n_genes) +adata = sc.AnnData(X=data) + +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + try: + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + sc.pp.scale(adata) + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + print("PCA completed.") + except Exception as e: + raise Exception(f"PCA sırasında hata oluştu: {e}") + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + raise Exception(f"'{color}' özelliği bulunamadı, renklemek için geçersiz.") + except Exception as e: + raise Exception(f"PCA görselleştirmesinde hata oluştu: {e}") + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + raise Exception(f"Varyans grafiğinde hata oluştu: {e}") + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + raise Exception(f"Sonuçları kaydederken hata oluştu: {e}") + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) diff --git a/src/modules/qc.py b/src/modules/qc.py index e69de29..bca2052 100644 --- a/src/modules/qc.py +++ b/src/modules/qc.py @@ -0,0 +1,52 @@ +import scanpy as sc +import matplotlib.pyplot as plt + +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + try: + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + sc.pp.scale(adata) + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + print("PCA completed.") + except Exception as e: + raise Exception(f"PCA işleminde hata oluştu: {e}") + +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + raise Exception(f"Belirtilen renk özelliği '{color}' bulunamadı.") + except Exception as e: + raise Exception(f"PCA grafiğinde hata oluştu: {e}") + +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + raise Exception(f"Varyans grafiğinde hata oluştu: {e}") + +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + raise Exception(f"Sonuçlar kaydedilirken hata oluştu: {e}") + +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata \ No newline at end of file diff --git a/src/pca_analysis.py b/src/pca_analysis.py new file mode 100644 index 0000000..4760724 --- /dev/null +++ b/src/pca_analysis.py @@ -0,0 +1,60 @@ +import scanpy as sc +import matplotlib.pyplot as plt + +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata