SpatialTranscriptomicsResearch · jfnavarro · Mar 2, 2017 · Mar 1, 2017 · Mar 1, 2017 · Mar 1, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -20,8 +20,12 @@ before_install:
   - export TMPDIR=$HOME/tmp
 
 install:
-    - conda create -q -n testenv --yes python=2.7 numpy scipy pandas
+    - conda create -q -n testenv --yes python=2.7 numpy scipy pandas cython
     - source activate testenv
+    - conda config --add channels r
+    - conda config --add channels bioconda
+    - conda install pysam
+    - python setup.py build
     - python setup.py install
 
 script:

diff --git a/setup.py b/setup.py
@@ -14,6 +14,7 @@
 import sys
 from setuptools import setup, find_packages
 from stpipeline.version import version_number
+from Cython.Build import cythonize
 
 # Get the long description from the relevant file
 here = os.path.abspath(os.path.dirname(__file__))
@@ -41,7 +42,8 @@
   author_email = 'jose.fernandez.navarro@scilifelab.se',
   license = 'MIT',
   url = 'https://github.com/SpatialTranscriptomicsResearch/st_pipeline',
-  packages = find_packages(exclude=('tests*', 'utils')),
+  packages = find_packages(exclude=('tests*', 'utils', "*.pyx")),
+  ext_modules = cythonize("stpipeline/common/*.pyx"),
   include_package_data = False,
   package_data = {'': ['RELEASE-VERSION']},
   zip_safe = False,

diff --git a/stpipeline/common/cdistance.pyx b/stpipeline/common/cdistance.pyx
@@ -0,0 +1,10 @@
+cpdef int hamming_distance(a, b):
+    cdef char * aa = a
+    cdef char * bb = b
+    cdef int k, l, c
+    c = 0
+    l = len(a)
+    for k from 0 <= k < l:
+        if aa[k] != bb[k]:
+            c += 1
+    return c
diff --git a/stpipeline/common/clustering.py b/stpipeline/common/clustering.py
@@ -6,7 +6,6 @@
 import numpy as np
 from scipy.cluster.hierarchy import linkage,fcluster
 from collections import defaultdict
-import pyximport; pyximport.install()
 from stpipeline.common.cdistance import hamming_distance
 import random
 from collections import Counter

diff --git a/stpipeline/common/distance.py b/stpipeline/common/distance.py
diff --git a/stpipeline/core/pipeline.py b/stpipeline/core/pipeline.py
@@ -23,6 +23,7 @@
 import bz2
 import tempfile
 import shutil
+import gc
 
 FILENAMES = {"mapped" : "mapped.bam",
              "annotated" : "annotated.bam",
@@ -687,8 +688,8 @@ def run(self):
             raise
 
         #=================================================================
-        # STEP: OBTAIN HASH OF DEMULTIPLEXED READS
-        # Hash demultiplexed reads to obtain a hash of read_name => (barcode,x,y,umi) 
+        # STEP: OBTAIN DICT OF DEMULTIPLEXED READS
+        # Iterate demultiplexed FASTQ reads to obtain a dict of read_name => (x,y,umi) 
         #=================================================================
         self.logger.info("Parsing demultiplexed reads {}".format(globaltime.getTimestamp()))
         hash_reads = hashDemultiplexedReads(FILENAMES["demultiplexed_matched"], 
@@ -697,7 +698,7 @@ def run(self):
                                             self.low_memory)
 
         #================================================================
-        # STEP: filters mapped reads and add the (Barcode,x,y,umi) as SAM tags
+        # STEP: filters mapped reads and add the (x,y,umi) as extra SAM tags
         #================================================================
         self.logger.info("Starting processing aligned reads {}".format(globaltime.getTimestamp()))
         try:
@@ -709,8 +710,13 @@ def run(self):
         except Exception:
             raise
         finally:
-            if self.low_memory: hash_reads.close() 
-
+            if self.low_memory: hash_reads.close()
+            else:
+                # Enforcing to remove the memory used 
+                hash_reads.clear()
+                del hash_reads
+            gc.collect()   
+
         #=================================================================
         # STEP: annotate using htseq-count
         #=================================================================