Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions aligness instructions.rtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{\rtf1\ansi\ansicpg1252\cocoartf2821
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\fnil\fcharset0 Menlo-Regular;}
{\colortbl;\red255\green255\blue255;\red0\green0\blue0;\red255\green255\blue255;\red0\green0\blue0;
}
{\*\expandedcolortbl;;\csgenericrgb\c0\c0\c0\c85000;\csgenericrgb\c100000\c100000\c100000;\csgray\c0;
}
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0

\f0\fs24 \cf0 #From nextflow results directory\
\
\pard\tx593\pardeftab593\partightenfactor0

\f1 \cf2 \cb3 git clone https://github.com/nagadhia/alignESS.git\
cd alignESS\
\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\partightenfactor0

\fs22 \cf4 \cb1 \ul \ulc4 \CocoaLigature0 conda crea\ulnone te -n aligness python=3.9\
conda env update --file conda_env_simple.yml\
\
Mkdir aligness_outputs\
Gunzip ../picrust/picrust2-2.5.2/EC_predicted.tsv\
\
#1st arg is a file where EC predictions are the columns for each ASV, 2nd arg is output file\
python aligness_preprocess.py ../picrust/picrust2-2.5.2/EC_predicted.tsv aligness_strings.txt\
\
python alignESS.py dbalign aligness_strings.txt -nproc 8 -t 1\
\
#Creates similarity matrix - arguments are \
python aligness_to_matrix.py output.txt ../../../dada2/ASV_table.tsv \
\
}
55 changes: 55 additions & 0 deletions aligness_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pandas as pd
import sys

def process_ec_data(input_filepath, output_filepath):
"""
Processes EC data from a TSV file and writes the results to a text file.

Args:
input_filepath (str): Path to the input TSV file.
output_filepath (str): Path to the output text file.
"""

# Read and prepare output of picrust
function_per_species_df = pd.read_csv(input_filepath, sep="\t")
d = function_per_species_df.drop("sequence", axis=1)
d = d.to_numpy()

# Get strings in the format aligness takes it - at depth of 3 EC numbers eg 2.1.3
function_per_species_dict = {}
for i in range(len(function_per_species_df)):
func_str_list = []
for j in range(len(function_per_species_df.columns[1:])):
if d[i][j] > 0:
s = function_per_species_df.columns[1:][j]
l0, l1, l2, l3 = s.split(".")
func_str = l0[-1] + "." + l1 + "." + l2
func_str_list.append(func_str)

all_func_str = ""
for funcs in list(set(func_str_list)):
all_func_str += funcs
all_func_str += ":"
function_per_species_dict[function_per_species_df["sequence"][i]] = all_func_str

# Combine into a single string
ec_strings = []
for s in list(function_per_species_dict.values()):
s = s[:-1]
ec_strings.append(s)

# Write strings into txt file with strings of each species in a new line
with open(output_filepath, mode="w", encoding="utf-8") as myfile:
for i in range(len(ec_strings)):
item = ec_strings[i]
sp_id = function_per_species_df["sequence"][i]
myfile.write(f"{sp_id}\t{item}\n")

if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python script.py <input_filepath> <output_filepath>")
sys.exit(1)

input_filepath = sys.argv[1]
output_filepath = sys.argv[2]
process_ec_data(input_filepath, output_filepath)
75 changes: 75 additions & 0 deletions aligness_strings.txt

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
17 changes: 17 additions & 0 deletions conda_env_simple.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: ess-env
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- python=3.10
- pip
- numpy
- pandas
- matplotlib
- seaborn
- scipy
- biopython
- scikit-learn


Loading