From c960aab8002174d57215e86d041ee113db2518d6 Mon Sep 17 00:00:00 2001 From: jfnavarro Date: Wed, 1 Mar 2017 15:14:56 +0100 Subject: [PATCH 1/7] Importing the cythong function in a better way --- setup.py | 4 +++- stpipeline/common/clustering.py | 1 - stpipeline/common/distance.py | 19 ------------------- 3 files changed, 3 insertions(+), 21 deletions(-) delete mode 100755 stpipeline/common/distance.py diff --git a/setup.py b/setup.py index 8242c23..e0e2f13 100755 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ import sys from setuptools import setup, find_packages from stpipeline.version import version_number +from Cython.Build import cythonize # Get the long description from the relevant file here = os.path.abspath(os.path.dirname(__file__)) @@ -41,7 +42,8 @@ author_email = 'jose.fernandez.navarro@scilifelab.se', license = 'MIT', url = 'https://github.com/SpatialTranscriptomicsResearch/st_pipeline', - packages = find_packages(exclude=('tests*', 'utils')), + packages = find_packages(exclude=('tests*', 'utils', "*.pyx")), + ext_modules = cythonize("stpipeline/common/*.pyx"), include_package_data = False, package_data = {'': ['RELEASE-VERSION']}, zip_safe = False, diff --git a/stpipeline/common/clustering.py b/stpipeline/common/clustering.py index 7a8c683..b8de564 100755 --- a/stpipeline/common/clustering.py +++ b/stpipeline/common/clustering.py @@ -6,7 +6,6 @@ import numpy as np from scipy.cluster.hierarchy import linkage,fcluster from collections import defaultdict -import pyximport; pyximport.install() from stpipeline.common.cdistance import hamming_distance import random from collections import Counter diff --git a/stpipeline/common/distance.py b/stpipeline/common/distance.py deleted file mode 100755 index d5e835a..0000000 --- a/stpipeline/common/distance.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -This module contains some functions for computing distance between -sequences -""" - -def hamming_distance(s1, s2): - """ - Returns the Hamming distance between equal-length sequences. - :param s1: the first string/sequence - :param s1: the second string/sequence - :type s1: str - :type s2: str - :return: the number of number of different elements in both sequences - :rtype: int - :raises: ValueError - """ - if len(s1) != len(s2): - raise ValueError("Undefined for sequences of unequal length") - return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2)) From 3c39fbc18759cb12b558f6df58fe8db3fb51fe2f Mon Sep 17 00:00:00 2001 From: jfnavarro Date: Wed, 1 Mar 2017 15:16:14 +0100 Subject: [PATCH 2/7] Importing the cythong function in a better way --- stpipeline/common/cdistance.pyx | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 stpipeline/common/cdistance.pyx diff --git a/stpipeline/common/cdistance.pyx b/stpipeline/common/cdistance.pyx new file mode 100644 index 0000000..18d8b8f --- /dev/null +++ b/stpipeline/common/cdistance.pyx @@ -0,0 +1,10 @@ +cpdef int hamming_distance(a, b): + cdef char * aa = a + cdef char * bb = b + cdef int k, l, c + c = 0 + l = len(a) + for k from 0 <= k < l: + if aa[k] != bb[k]: + c += 1 + return c \ No newline at end of file From 4f300c902a7e4c4a434f9d39f1d5812336dc350f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fern=C3=A1ndez=20Navarro?= Date: Wed, 1 Mar 2017 16:10:27 +0100 Subject: [PATCH 3/7] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8ceb0e2..a37146a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ before_install: - export TMPDIR=$HOME/tmp install: - - conda create -q -n testenv --yes python=2.7 numpy scipy pandas + - conda create -q -n testenv --yes python=2.7 numpy scipy pandas cython - source activate testenv - python setup.py install From 0669792de25d665f30a00665fd817549bda7207c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fern=C3=A1ndez=20Navarro?= Date: Thu, 2 Mar 2017 18:25:00 +0100 Subject: [PATCH 4/7] Enforce to remove memory used by the dicionary of demultiplexed reads --- stpipeline/core/pipeline.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/stpipeline/core/pipeline.py b/stpipeline/core/pipeline.py index 9901077..d3cddb2 100755 --- a/stpipeline/core/pipeline.py +++ b/stpipeline/core/pipeline.py @@ -23,6 +23,7 @@ import bz2 import tempfile import shutil +import gc FILENAMES = {"mapped" : "mapped.bam", "annotated" : "annotated.bam", @@ -687,8 +688,8 @@ def run(self): raise #================================================================= - # STEP: OBTAIN HASH OF DEMULTIPLEXED READS - # Hash demultiplexed reads to obtain a hash of read_name => (barcode,x,y,umi) + # STEP: OBTAIN DICT OF DEMULTIPLEXED READS + # Iterate demultiplexed FASTQ reads to obtain a dict of read_name => (x,y,umi) #================================================================= self.logger.info("Parsing demultiplexed reads {}".format(globaltime.getTimestamp())) hash_reads = hashDemultiplexedReads(FILENAMES["demultiplexed_matched"], @@ -697,7 +698,7 @@ def run(self): self.low_memory) #================================================================ - # STEP: filters mapped reads and add the (Barcode,x,y,umi) as SAM tags + # STEP: filters mapped reads and add the (x,y,umi) as extra SAM tags #================================================================ self.logger.info("Starting processing aligned reads {}".format(globaltime.getTimestamp())) try: @@ -709,8 +710,12 @@ def run(self): except Exception: raise finally: - if self.low_memory: hash_reads.close() - + if self.low_memory: hash_reads.close() + # Enforcing to remove the memory used + hash_reads.clear() + del hash_reads + gc.collect() + #================================================================= # STEP: annotate using htseq-count #================================================================= From 6b6d84045b1add015aee86a631610d0c29413cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fern=C3=A1ndez=20Navarro?= Date: Thu, 2 Mar 2017 18:37:25 +0100 Subject: [PATCH 5/7] Update .travis.yml --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index a37146a..983f900 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,8 @@ before_install: install: - conda create -q -n testenv --yes python=2.7 numpy scipy pandas cython - source activate testenv + - conda install pysam + - python setup.py build - python setup.py install script: From 2c622c3d9f52470f0e44d7760a00b4ceabdb546e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fern=C3=A1ndez=20Navarro?= Date: Thu, 2 Mar 2017 18:42:57 +0100 Subject: [PATCH 6/7] Update .travis.yml --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 983f900..8aeafc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,8 @@ before_install: install: - conda create -q -n testenv --yes python=2.7 numpy scipy pandas cython - source activate testenv + - conda config --add channels r + - conda config --add channels bioconda - conda install pysam - python setup.py build - python setup.py install From 9eef3c8c19f22a00f8188b3f638ece91004b8347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fern=C3=A1ndez=20Navarro?= Date: Thu, 2 Mar 2017 19:07:55 +0100 Subject: [PATCH 7/7] Enforce to remove memory used by the dicionary of demultiplexed reads --- stpipeline/core/pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stpipeline/core/pipeline.py b/stpipeline/core/pipeline.py index d3cddb2..407fcaf 100755 --- a/stpipeline/core/pipeline.py +++ b/stpipeline/core/pipeline.py @@ -711,9 +711,10 @@ def run(self): raise finally: if self.low_memory: hash_reads.close() - # Enforcing to remove the memory used - hash_reads.clear() - del hash_reads + else: + # Enforcing to remove the memory used + hash_reads.clear() + del hash_reads gc.collect() #=================================================================