Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<h1 align="center">Welcome to PathFlowAI 👋</h1>
<h1 align="center">Welcome to PathFlowAI </h1>
<p>
<img alt="Version" src="https://img.shields.io/badge/version-0.1-blue.svg?cacheSeconds=2592000" />
<a href="https://jlevy44.github.io/PathFlowAI/">
Expand All @@ -10,11 +10,11 @@

### 🏠 [Homepage](https://github.com/jlevy44/PathFlowAI)

MedRxiv Manuscript: https://www.medrxiv.org/content/10.1101/19003897v1
Published in the Proceedings of the Pacific Symposium for Biocomputing 2020, Manuscript: https://psb.stanford.edu/psb-online/proceedings/psb20/Levy.pdf

## Install

First, install [openslide](https://openslide.org/download/).
First, install [openslide](https://openslide.org/download/). Note: may need to install libiconv and shapely using conda. Will update with more installation information, please submit issues as well.

```sh
pip install pathflowai
Expand Down
2 changes: 2 additions & 0 deletions bin/install_apex
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

export TMPDIR=$HOME/tmp
mkdir -p $TMPDIR
rm -rf apex
git clone https://github.com/NVIDIA/apex
cd apex
Expand Down
11 changes: 11 additions & 0 deletions bin/install_lightnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
#python -m lightnet download tiny-yolo
#python -m lightnet download yolo
rm -rf lightnet
git clone https://gitlab.com/EAVISE/lightnet.git
cd lightnet
pip install .
cd ..
rm -rf lightnet
#wget https://pjreddie.com/media/files/yolo.weights
#wget https://pjreddie.com/media/files/tiny-yolo.weights
99 changes: 99 additions & 0 deletions experimental/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#
# Lightnet dataset that works with brambox annotations
# Copyright EAVISE
#
# https://eavise.gitlab.io/lightnet/_modules/lightnet/models/_dataset_brambox.html#BramboxDataset
# https://eavise.gitlab.io/brambox/notes/02-getting_started.html#Loading-data

import os
import copy
import logging
from PIL import Image
import numpy as np
import lightnet.data as lnd
from pathflowai.utils import load_sql_df
import dask.array as da
from os.path import join

try:
import brambox as bb
except ImportError:
bb = None

__all__ = ['BramboxDataset']
log = logging.getLogger(__name__)

# ADD IMAGE ANNOTATION TRANSFORM
# ADD TRAIN VAL TEST INFO

class BramboxPathFlowDataset(lnd.Dataset):
""" Dataset for any brambox annotations.

Args:
annotations (dataframe): Dataframe containing brambox annotations
input_dimension (tuple): (width,height) tuple with default dimensions of the network
class_label_map (list): List of class_labels
identify (function, optional): Lambda/function to get image based of annotation filename or image id; Default **replace/add .png extension to filename/id**
img_transform (torchvision.transforms.Compose): Transforms to perform on the images
anno_transform (torchvision.transforms.Compose): Transforms to perform on the annotations

Note:
This dataset opens images with the Pillow library
"""
def __init__(self, input_dir, patch_info_file, patch_size, annotations, input_dimension, class_label_map=None, identify=None, img_transform=None, anno_transform=None):
if bb is None:
raise ImportError('Brambox needs to be installed to use this dataset')
super().__init__(input_dimension)

self.annos = annotations
self.annos['ignore']=0
self.annos['class_label']=self.annos['class_label'].astype(int)#-1
print(self.annos['class_label'].unique())
#print(self.annos.shape)
self.keys = self.annos.image.cat.categories # stores unique patches
#print(self.keys)
self.img_tf = img_transform
self.anno_tf = anno_transform
self.patch_info=load_sql_df(patch_info_file, patch_size)
IDs=self.patch_info['ID'].unique()
self.slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs}
self.id = lambda k: k.split('/')
# experiment
#self.annos['x_top_left'], self.annos['y_top_left']=self.annos['y_top_left'], self.annos['x_top_left']
self.annos['width'], self.annos['height']=self.annos['height'], self.annos['width']
# Add class_ids
if class_label_map is None:
log.warning(f'No class_label_map given, generating it by sorting unique class labels from data alphabetically, which is not always deterministic behaviour')
class_label_map = list(np.sort(self.annos.class_label.unique()))
self.annos['class_id'] = self.annos.class_label.map(dict((l, i) for i, l in enumerate(class_label_map)))

def __len__(self):
return len(self.keys)

@lnd.Dataset.resize_getitem
def __getitem__(self, index):
""" Get transformed image and annotations based of the index of ``self.keys``

Args:
index (int): index of the ``self.keys`` list containing all the image identifiers of the dataset.

Returns:
tuple: (transformed image, list of transformed brambox boxes)
"""
if index >= len(self):
raise IndexError(f'list index out of range [{index}/{len(self)-1}]')

# Load
#print(self.keys[index])
ID,x,y,patch_size=self.id(self.keys[index])
x,y,patch_size=int(x),int(y),int(patch_size)
img = self.slides[ID][x:x+patch_size,y:y+patch_size].compute()#Image.open(self.id(self.keys[index]))
anno = bb.util.select_images(self.annos, [self.keys[index]])

# Transform
if self.img_tf is not None:
img = self.img_tf(img)
if self.anno_tf is not None:
anno = self.anno_tf(anno)

return img, anno
23 changes: 23 additions & 0 deletions experimental/get_anchors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from sklearn.cluster import KMeans
import numpy as np, pandas as pd, brambox as bb
import pickle, argparse

p=argparse.ArgumentParser()
p.add_argument('--patch_size',default=512,type=int)
p.add_argument('--n_anchors',default=20,type=int)
p.add_argument('--sample_p',default=1.,type=float)

args=p.parse_args()
np.random.seed(42)
patch_size=args.patch_size
n_anchors=args.n_anchors
sample_p=args.sample_p
annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size)
annotations=bb.io.load('pandas',annotation_file)
if sample_p<1.:
annotations=annotations.sample(frac=sample_p)

X=annotations[['x_top_left','y_top_left']].astype(float).values+(annotations['width']/2.).astype(float).values.reshape(-1,1)
km=KMeans(n_clusters=n_anchors,n_jobs=-1).fit(X)
anchors=km.cluster_centers_
pickle.dump(anchors,open('anchors.pkl','wb'))
137 changes: 137 additions & 0 deletions experimental/get_bounding_boxes_from_seg_point_masks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import brambox as bb
import os
from os.path import join, basename
from pathflowai.utils import load_sql_df, npy2da
import skimage
import dask, dask.array as da, pandas as pd, numpy as np
import argparse
from scipy import ndimage
from scipy.ndimage.measurements import label
import pickle
from dask.distributed import Client
from multiprocessing import Pool
from functools import reduce

def get_box(l,prop):
c=[prop.centroid[1], prop.centroid[0]]
# l=rev_label[i+1]
width = prop.bbox[3] - prop.bbox[1] + 1
height = prop.bbox[2] - prop.bbox[0] + 1
wh=max(width,height)
# c = [ci-wh/2 for ci in c]
return [l]+c+[wh]

def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3):
lbls,n_lbl=label(m)
obj_labels={}
for i in range(1,num_classes+1):
obj_labels[i]=np.unique(lbls[m==i].flatten())
rev_label={}
for k in obj_labels:
for i in obj_labels[k]:
rev_label[i]=k
rev_label={k:rev_label[k] for k in sorted(list(rev_label.keys()))}
objProps = list(skimage.measure.regionprops(lbls))
#print(len(objProps),len(rev_label))
boxes=dask.compute(*[dask.delayed(get_box)(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())],scheduler='threading') # [get_box(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())]#
#print(boxes)
boxes=pd.DataFrame(np.array(boxes).astype(int),columns=['class_label','x_top_left','y_top_left','width'])

#boxes['class_label']=m[boxes[['x_top_left','y_top_left']].values.T.tolist()]
boxes['height']=boxes['width']
boxes['image']='{}/{}/{}/{}'.format(ID,x,y,patch_size)
boxes=boxes[['image','class_label','x_top_left','y_top_left','width','height']]
boxes.loc[:,'x_top_left']=np.clip(boxes.loc[:,'x_top_left'],0,m.shape[1])
boxes.loc[:,'y_top_left']=np.clip(boxes.loc[:,'y_top_left'],0,m.shape[0])

bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']]
bbox_df=bbox_df.append(boxes)
#print(boxes)
return boxes

if __name__=='__main__':
p=argparse.ArgumentParser()
p.add_argument('--num_classes',default=4,type=int)
p.add_argument('--patch_size',default=512,type=int)
p.add_argument('--n_workers',default=40,type=int)
p.add_argument('--p_sample',default=0.7,type=float)
p.add_argument('--input_dir',default='inputs',type=str)
p.add_argument('--patch_info_file',default='cell_info.db',type=str)
p.add_argument('--reference_mask',default='reference_mask.npy',type=str)
#c=Client()
# add mode to just use own extracted boudning boxes or from seg, maybe from histomicstk

args=p.parse_args()
num_classes=args.num_classes
n_workers=args.n_workers
input_dir=args.input_dir
patch_info_file=args.patch_info_file
patch_size=args.patch_size
p_sample=args.p_sample
np.random.seed(42)
annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size)
reference_mask=args.reference_mask
if not os.path.exists('widths.pkl'):
m=np.load(reference_mask)
bbox_df=get_boxes(m)
official_widths=dict(bbox_df.groupby('class_label')['width'].mean()+2*bbox_df.groupby('class_label')['width'].std())
pickle.dump(official_widths,open('widths.pkl','wb'))
else:
official_widths=pickle.load(open('widths.pkl','rb'))

patch_info=load_sql_df(patch_info_file, patch_size)
IDs=patch_info['ID'].unique()
#slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs}
masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(mask))) for mask in IDs}

if p_sample < 1.:
patch_info=patch_info.sample(frac=p_sample)

if not os.path.exists(annotation_file):
bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']]
else:
bbox_df=bb.io.load('pandas',annotation_file)

patch_info=patch_info[~np.isin(np.vectorize(lambda i: '/'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist()))(np.arange(patch_info.shape[0])),set(bbox_df.image.cat.categories))]

print(patch_info.shape[0])

def get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes):
bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2, num_classes=num_classes)
for i in official_widths.keys():
bbox_dff.loc[bbox_dff['class_label']==i,'width']=int(official_widths[i])
bbox_dff.loc[:,'x_top_left']=(bbox_dff.loc[:,'x_top_left']-bbox_dff['width']/2.).astype(int)
bbox_dff.loc[:,'y_top_left']=(bbox_dff.loc[:,'y_top_left']-bbox_dff['width']/2.).astype(int)
bbox_dff.loc[:,'x_top_left']=np.clip(bbox_dff.loc[:,'x_top_left'],0,m.shape[1])
bbox_dff.loc[:,'y_top_left']=np.clip(bbox_dff.loc[:,'y_top_left'],0,m.shape[0])
return bbox_dff

def process_chunk(patch_info_sub):
patch_info_sub=patch_info_sub.reset_index(drop=True)
bbox_dfs=[]

for i in range(patch_info_sub.shape[0]):
#print(i)
patch=patch_info_sub.iloc[i]
ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist()
m=masks[ID][x:x+patch_size2,y:y+patch_size2]
bbox_dff=get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes)#dask.delayed(get_boxes_point_seg)(m,ID,x,y,patch_size2)
#print(bbox_dff)
bbox_dfs.append(bbox_dff)
return bbox_dfs

patch_info_subs=np.array_split(patch_info,n_workers)

p=Pool(n_workers)

bbox_dfs=reduce(lambda x,y:x+y,p.map(process_chunk,patch_info_subs))

#bbox_dfs=dask.compute(*bbox_dfs,scheduler='processes')

bbox_df=pd.concat([bbox_df]+bbox_dfs)


bbox_df.loc[:,'height']=bbox_df['width']


bb.io.save(bbox_df,'pandas',annotation_file)
73 changes: 73 additions & 0 deletions experimental/get_counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import brambox as bb
import os
from os.path import join, basename
from pathflowai.utils import load_sql_df, npy2da, df2sql
import skimage
import dask, dask.array as da, pandas as pd, numpy as np
import argparse
from scipy import ndimage
from scipy.ndimage.measurements import label
import pickle
from dask.distributed import Client
from multiprocessing import Pool
from functools import reduce

def count_cells(m, num_classes=3):
lbls,n_lbl=label(m)
obj_labels=np.zeros(num_classes)
for i in range(1,num_classes+1):
obj_labels[i-1]=len(np.unique(lbls[m==i].flatten()))
return obj_labels

if __name__=='__main__':
p=argparse.ArgumentParser()
p.add_argument('--num_classes',default=4,type=int)
p.add_argument('--patch_size',default=512,type=int)
p.add_argument('--n_workers',default=40,type=int)
p.add_argument('--p_sample',default=0.7,type=float)
p.add_argument('--input_dir',default='inputs',type=str)
p.add_argument('--patch_info_file',default='cell_info.db',type=str)
p.add_argument('--reference_mask',default='reference_mask.npy',type=str)
#c=Client()
# add mode to just use own extracted boudning boxes or from seg, maybe from histomicstk

args=p.parse_args()
num_classes=args.num_classes
n_workers=args.n_workers
input_dir=args.input_dir
patch_info_file=args.patch_info_file
patch_size=args.patch_size
np.random.seed(42)
reference_mask=args.reference_mask

patch_info=load_sql_df(patch_info_file, patch_size)
IDs=patch_info['ID'].unique()
#slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs}
masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(mask))) for mask in IDs}

def process_chunk(patch_info_sub):
patch_info_sub=patch_info_sub.reset_index(drop=True)
counts=[]
for i in range(patch_info_sub.shape[0]):
#print(i)
patch=patch_info_sub.iloc[i]
ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist()
m=masks[ID][x:x+patch_size2,y:y+patch_size2]
counts.append(dask.delayed(count_cells)(m, num_classes=num_classes))

return dask.compute(*counts,scheduler='threading')

patch_info_subs=np.array_split(patch_info,n_workers)

p=Pool(n_workers)

counts=reduce(lambda x,y:x+y,p.map(process_chunk,patch_info_subs))

#bbox_dfs=dask.compute(*bbox_dfs,scheduler='processes')

counts=pd.DataFrame(np.vstack(counts))

patch_info=pd.concat([patch_info[['ID','x','y','patch_size','annotation']].reset_index(drop=True),counts.reset_index(drop=True)],axis=1).reset_index()
print(patch_info)

df2sql(patch_info, 'counts_test.db', patch_size, mode='replace')
Loading