From 4157d94434f17d3de4d3b4328cef04775a51996d Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 00:45:05 -0400 Subject: [PATCH 01/27] Working on adding object detection --- bin/install_yolo | 5 ++ experimental/datasets.py | 88 ++++++++++++++++++++++++ experimental/get_bounding_boxes.py | 74 ++++++++++++++++++++ experimental/object_detection.py | 106 +++++++++++++++++++++++++++++ setup.py | 4 +- 5 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 bin/install_yolo create mode 100644 experimental/datasets.py create mode 100644 experimental/get_bounding_boxes.py create mode 100644 experimental/object_detection.py diff --git a/bin/install_yolo b/bin/install_yolo new file mode 100644 index 0000000..cda6e75 --- /dev/null +++ b/bin/install_yolo @@ -0,0 +1,5 @@ +#!/bin/bash +#python -m lightnet download tiny-yolo +#python -m lightnet download yolo +wget https://pjreddie.com/media/files/yolo.weights +wget https://pjreddie.com/media/files/tiny-yolo.weights diff --git a/experimental/datasets.py b/experimental/datasets.py new file mode 100644 index 0000000..3839c4c --- /dev/null +++ b/experimental/datasets.py @@ -0,0 +1,88 @@ +# +# Lightnet dataset that works with brambox annotations +# Copyright EAVISE +# +# https://eavise.gitlab.io/lightnet/_modules/lightnet/models/_dataset_brambox.html#BramboxDataset +# https://eavise.gitlab.io/brambox/notes/02-getting_started.html#Loading-data + +import os +import copy +import logging +from PIL import Image +import numpy as np +import lightnet.data as lnd +from pathflowai.utils import load_sql_df + +try: + import brambox as bb +except ImportError: + bb = None + +__all__ = ['BramboxDataset'] +log = logging.getLogger(__name__) + +# ADD IMAGE ANNOTATION TRANSFORM +# ADD TRAIN VAL TEST INFO +class BramboxPathFlowDataset(lnd.Dataset): + """ Dataset for any brambox annotations. + + Args: + annotations (dataframe): Dataframe containing brambox annotations + input_dimension (tuple): (width,height) tuple with default dimensions of the network + class_label_map (list): List of class_labels + identify (function, optional): Lambda/function to get image based of annotation filename or image id; Default **replace/add .png extension to filename/id** + img_transform (torchvision.transforms.Compose): Transforms to perform on the images + anno_transform (torchvision.transforms.Compose): Transforms to perform on the annotations + + Note: + This dataset opens images with the Pillow library + """ + def __init__(self, patch_info_file, patch_size, annotations, input_dimension, class_label_map=None, identify=None, img_transform=None, anno_transform=None): + if bb is None: + raise ImportError('Brambox needs to be installed to use this dataset') + super().__init__(input_dimension) + + self.annos = annotations + self.keys = self.annos.image.cat.categories # stores unique patches + self.img_tf = img_transform + self.anno_tf = anno_transform + self.patch_info=load_sql_df(patch_info_file, patch_size) + IDs=self.patch_info['ID'].unique() + self.slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} + self.id = lambda k: self.keys[k].split('/') + + # Add class_ids + if class_label_map is None: + log.warning(f'No class_label_map given, generating it by sorting unique class labels from data alphabetically, which is not always deterministic behaviour') + class_label_map = list(np.sort(self.annos.class_label.unique())) + self.annos['class_id'] = self.annos.class_label.map(dict((l, i) for i, l in enumerate(class_label_map))) + + def __len__(self): + return len(self.keys) + + @lnd.Dataset.resize_getitem + def __getitem__(self, index): + """ Get transformed image and annotations based of the index of ``self.keys`` + + Args: + index (int): index of the ``self.keys`` list containing all the image identifiers of the dataset. + + Returns: + tuple: (transformed image, list of transformed brambox boxes) + """ + if index >= len(self): + raise IndexError(f'list index out of range [{index}/{len(self)-1}]') + + # Load + ID,x,y,patch_size=self.id(self.keys[index]) + x,y,patch_size=int(x),int(y),int(patch_size) + img = self.slides[ID][x:x+patch_size,y:y+patch_size].compute()#Image.open(self.id(self.keys[index])) + anno = bb.util.select_images(self.annos, [self.keys[index]]) + + # Transform + if self.img_tf is not None: + img = self.img_tf(img) + if self.anno_tf is not None: + anno = self.anno_tf(anno) + + return img, anno diff --git a/experimental/get_bounding_boxes.py b/experimental/get_bounding_boxes.py new file mode 100644 index 0000000..64a3c4e --- /dev/null +++ b/experimental/get_bounding_boxes.py @@ -0,0 +1,74 @@ +import brambox as bb +import os +from pathflowai.utils import load_sql_df, npy2da + +def get_box(i,prop): + c=[prop.centroid[1], prop.centroid[0]] + l=rev_label[i+1] + width = prop.bbox[3] - prop.bbox[1] + 1 + height = prop.bbox[2] - prop.bbox[0] + 1 + wh=max(width,height) + c = [ci-wh/2 for ci in c] + return [l]+c+[wh] + +def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): + lbls,n_lbl=label(m) + obj_labels={} + for i in range(1,5): + obj_labels[i]=np.unique(lbls[m==i].flatten()) + rev_label={} + for k in obj_labels: + for i in obj_labels[k]: + rev_label[i]=k + objProps = skimage.measure.regionprops(lbls) + boxes=dask.compute(*[dask.delayed(get_box)(i,prop) for i,prop in enumerate(objProps)],scheduler='threading') + #print(boxes) + boxes=pd.DataFrame(np.array(boxes).astype(int),columns=['class_label','x_top_left','y_top_left','width']) + boxes['height']=boxes['width'] + boxes['image']='{}_{}_{}_{}'.format(ID,x,y,patch_size) + boxes=boxes[['image','class_label','x_top_left','y_top_left','width','height']] + boxes.loc[:,'x_top_left']=np.clip(boxes.loc[:,'x_top_left'],0,m.shape[1]) + boxes.loc[:,'y_top_left']=np.clip(boxes.loc[:,'y_top_left'],0,m.shape[0]) + bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']] + bbox_df=bbox_df.append(boxes) + return boxes + +if __name__=='__main__': + input_dir='inputs' + patch_info_file='cell_info.db' + patch_size=256 + p_sample=0.7 + annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) + reference_mask='reference_mask.npy' + if not os.path.exists('widths.pkl'): + m=np.load(reference_mask) + bbox_df=get_boxes(m) + official_widths=dict(bbox_df.groupby('class_label')['width'].mean()+2*bbox_df.groupby('class_label')['width'].std()) + pickle.dump(official_widths,open('widths.pkl','wb')) + else: + official_widths=pickle.load(open('widths.pkl','rb')) + + patch_info=load_sql_df(patch_info_file, patch_size) + IDs=patch_info['ID'].unique() + #slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} + masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(slide))) for slide in IDs} + + patch_info=patch_info.sample(frac=p_sample) + + bbox_dfs=[] + for i in patch_info.shape[0]: + patch=patch_info.iloc[i] + bbox_dff=get_boxes(masks[patch['ID']],ID=patch['ID'],x=patch['x'],y=patch['y'],patch_size=patch['patch_size']) + + if not os.path.exists(annotation_file): + bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']] + else: + bbox_df=bb.io.load('pandas',annotation_file) + + bbox_df=pd.concat([bbox_df]+bbox_dfs) + + for i in official_widths.index: + bbox_df.loc[bbox_df['class_label']==i,'width']=int(official_widths[i]) + bbox_df.loc[:,'height']=bbox_df['width'] + + bb.io.save(bbox_df,'pandas',annotation_file) diff --git a/experimental/object_detection.py b/experimental/object_detection.py new file mode 100644 index 0000000..cdc1d7a --- /dev/null +++ b/experimental/object_detection.py @@ -0,0 +1,106 @@ +import lightnet as ln +import torch +import numpy as np +import matplotlib.pyplot as plt +import brambox as bb +import dask as da +from datasets import BramboxPathFlowDataset + +# Settings +ln.logger.setConsoleLevel('ERROR') # Only show error log messages +bb.logger.setConsoleLevel('ERROR') +# https://eavise.gitlab.io/lightnet/notes/02-B-engine.html + +num_classes=3 +patch_size=256 +patch_info_file='cell_info.db' +annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) +annotations=bb.io.load('pandas',annotation_file) + +model=ln.models.Yolo(num_classes=num_classes) + +loss = ln.network.loss.RegionLoss( + num_classes=model.num_classes, + anchors=model.anchors, + stride=model.stride +) + +transforms = ln.data.transform.Compose([ln.data.transform.RandomHSV( + hue=1, + saturation=2, + value=2 +)]) + +post = ln.data.transform.Compose([ + ln.data.transform.GetBoundingBoxes( + num_classes=params.network.num_classes, + anchors=params.network.anchors, + conf_thresh=0.5, + ), + + ln.data.transform.NonMaxSuppression( + nms_thresh=0.5 + ), + + ln.data.transform.TensorToBrambox( + network_size=(patch_size,patch_size), + # class_label_map=class_label_map, + ) +]) + +dataset=BramboxPathFlowDataset(patch_info_file, patch_size, annotations, input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) + +class CustomEngine(ln.engine.Engine): + def start(self): + """ Do whatever needs to be done before starting """ + self.params.to(self.device) # Casting parameters to a certain device + self.optim.zero_grad() # Make sure to start with no gradients + self.loss_acc = [] # Loss accumulator + + def process_batch(self, data): + """ Forward and backward pass """ + data, target = data # Unpack + + output = self.network(data) + loss = self.loss(output, target) + loss.backward() + + self.loss_acc.append(loss.item()) + + def train_batch(self): + """ Weight update and logging """ + self.optim.step() + self.optim.zero_grad() + + batch_loss = sum(self.loss_acc) / len(self.loss_acc) + self.loss_acc = [] + self.log(f'Loss: {batch_loss}') + + def quit(self): + if self.batch >= self.max_batches: # Should probably save weights here + print('Reached end of training') + return True + return False + +# Create HyperParameters +params = ln.engine.HyperParameters( + network=model, + mini_batch_size=8, + batch_size=64, + max_batches=128 +) +params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) +params.optim = torch.optim.SGD(params.network.parameters(), lr=0.001) + + +dl = ln.data.DataLoader( + dataset, + batch_size = 2, + collate_fn = ln.data.list_collate # We want the data to be grouped as a list +) + +# Create engine +engine = CustomEngine( + params, dl, # Dataloader (None) is not valid + device=torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu') +) diff --git a/setup.py b/setup.py index 78cb253..cc77854 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,9 @@ 'networkx', 'shap', 'pyyaml', - 'torch-encoding'] + 'torch-encoding', + 'lightnet', + 'brambox'] with open('README.md','r', encoding='utf-8') as f: long_description = f.read() From 8f714e0bff3d27f514996f7faf4d3ae6adf3b420 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 10:02:42 -0400 Subject: [PATCH 02/27] Working on algorithm for bounding boxes, debug --- experimental/datasets.py | 1 + experimental/get_bounding_boxes.py | 50 +++++++++++++++++++++++------- experimental/object_detection.py | 13 ++++++-- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/experimental/datasets.py b/experimental/datasets.py index 3839c4c..023715f 100644 --- a/experimental/datasets.py +++ b/experimental/datasets.py @@ -12,6 +12,7 @@ import numpy as np import lightnet.data as lnd from pathflowai.utils import load_sql_df +import dask.array as da try: import brambox as bb diff --git a/experimental/get_bounding_boxes.py b/experimental/get_bounding_boxes.py index 64a3c4e..7c3c10c 100644 --- a/experimental/get_bounding_boxes.py +++ b/experimental/get_bounding_boxes.py @@ -1,6 +1,9 @@ import brambox as bb import os from pathflowai.utils import load_sql_df, npy2da +import skimage +import dask, dask.array as da, pandas as pd, numpy as np +import argparse def get_box(i,prop): c=[prop.centroid[1], prop.centroid[0]] @@ -8,7 +11,7 @@ def get_box(i,prop): width = prop.bbox[3] - prop.bbox[1] + 1 height = prop.bbox[2] - prop.bbox[0] + 1 wh=max(width,height) - c = [ci-wh/2 for ci in c] + #c = [ci-wh/2 for ci in c] return [l]+c+[wh] def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): @@ -34,12 +37,21 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): return boxes if __name__=='__main__': - input_dir='inputs' - patch_info_file='cell_info.db' - patch_size=256 - p_sample=0.7 + p=argparse.ArgumentParser() + p.add_argument('--num_classes',default=3,type=int) + p.add_argument('--patch_size',default=256,type=int) + p.add_argument('--p_sample',default=0.7,type=float) + p.add_argument('--input_dir',default='inputs',type=str) + p.add_argument('--patch_info_file',default='cell_info.db',type=str) + p.add_argument('--reference_mask',default='reference_mask.npy',type=str) + + args=p.parse_args() + input_dir=args.input_dir + patch_info_file=args.patch_info_file + patch_size=args.patch_size + p_sample=args.p_sample annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) - reference_mask='reference_mask.npy' + reference_mask=args.reference_mask if not os.path.exists('widths.pkl'): m=np.load(reference_mask) bbox_df=get_boxes(m) @@ -53,22 +65,36 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): #slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(slide))) for slide in IDs} - patch_info=patch_info.sample(frac=p_sample) + if p_sample < 1.: + patch_info=patch_info.sample(frac=p_sample) bbox_dfs=[] - for i in patch_info.shape[0]: - patch=patch_info.iloc[i] - bbox_dff=get_boxes(masks[patch['ID']],ID=patch['ID'],x=patch['x'],y=patch['y'],patch_size=patch['patch_size']) if not os.path.exists(annotation_file): bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']] else: bbox_df=bb.io.load('pandas',annotation_file) + patch_info=patch_info[~np.isin(np.vectorize(lambda i: '_'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist())(patch_info.shape[0]),set(bbox_df.image.cat.categories))] + + for i in patch_info.shape[0]: + print(i) + patch=patch_info.iloc[i] + ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() + m=masks[ID][x:x+patch_size2,y:y+patch_size2] + bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2) + for i in official_widths.index: + bbox_dff.loc[bbox_dff['class_label']==i,'width']=int(official_widths[i]) + bbox_dff.loc[:,'x_top_left']=boxes.loc[:,'x_top_left']-bbox_df['width']/2 + bbox_dff.loc[:,'y_top_left']=boxes.loc[:,'y_top_left']-bbox_df['width']/2 + bbox_dff.loc[:,'x_top_left']=np.clip(bbox_dff.loc[:,'x_top_left'],0,m.shape[1]) + bbox_dff.loc[:,'y_top_left']=np.clip(bbox_dff.loc[:,'y_top_left'],0,m.shape[0]) + bbox_dfs.append(bbox_dff) + bbox_df=pd.concat([bbox_df]+bbox_dfs) - for i in official_widths.index: - bbox_df.loc[bbox_df['class_label']==i,'width']=int(official_widths[i]) + bbox_df.loc[:,'height']=bbox_df['width'] + bb.io.save(bbox_df,'pandas',annotation_file) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index cdc1d7a..38b1ab0 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -5,15 +5,22 @@ import brambox as bb import dask as da from datasets import BramboxPathFlowDataset +import argparse # Settings ln.logger.setConsoleLevel('ERROR') # Only show error log messages bb.logger.setConsoleLevel('ERROR') # https://eavise.gitlab.io/lightnet/notes/02-B-engine.html -num_classes=3 -patch_size=256 -patch_info_file='cell_info.db' +p=argparse.ArgumentParser() +p.add_argument('--num_classes',default=3,type=int) +p.add_argument('--patch_size',default=256,type=int) +p.add_argument('--patch_info_file',default='cell_info.db',type=str) +args=p.parse_args() +num_classes=args.num_classes +patch_size=args.patch_size +patch_info_file=args.patch_info_file + annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) annotations=bb.io.load('pandas',annotation_file) From cbe70bcdba24252b775611e9564fb5f14cea919f Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 13:40:20 -0400 Subject: [PATCH 03/27] Debugging --- bin/install_lightnet | 11 +++ bin/install_yolo | 5 -- experimental/datasets.py | 3 +- ...et_bounding_boxes_from_seg_point_masks.py} | 76 +++++++++++++------ experimental/object_detection.py | 30 ++++---- setup.py | 5 +- 6 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 bin/install_lightnet delete mode 100644 bin/install_yolo rename experimental/{get_bounding_boxes.py => get_bounding_boxes_from_seg_point_masks.py} (58%) diff --git a/bin/install_lightnet b/bin/install_lightnet new file mode 100644 index 0000000..eb0cbb7 --- /dev/null +++ b/bin/install_lightnet @@ -0,0 +1,11 @@ +#!/bin/bash +#python -m lightnet download tiny-yolo +#python -m lightnet download yolo +rm -rf lightnet +git clone https://gitlab.com/EAVISE/lightnet.git +cd lightnet +pip install . +cd .. +rm -rf lightnet +#wget https://pjreddie.com/media/files/yolo.weights +#wget https://pjreddie.com/media/files/tiny-yolo.weights diff --git a/bin/install_yolo b/bin/install_yolo deleted file mode 100644 index cda6e75..0000000 --- a/bin/install_yolo +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -#python -m lightnet download tiny-yolo -#python -m lightnet download yolo -wget https://pjreddie.com/media/files/yolo.weights -wget https://pjreddie.com/media/files/tiny-yolo.weights diff --git a/experimental/datasets.py b/experimental/datasets.py index 023715f..bf0cafb 100644 --- a/experimental/datasets.py +++ b/experimental/datasets.py @@ -13,6 +13,7 @@ import lightnet.data as lnd from pathflowai.utils import load_sql_df import dask.array as da +from os.path import join try: import brambox as bb @@ -38,7 +39,7 @@ class BramboxPathFlowDataset(lnd.Dataset): Note: This dataset opens images with the Pillow library """ - def __init__(self, patch_info_file, patch_size, annotations, input_dimension, class_label_map=None, identify=None, img_transform=None, anno_transform=None): + def __init__(self, input_dir, patch_info_file, patch_size, annotations, input_dimension, class_label_map=None, identify=None, img_transform=None, anno_transform=None): if bb is None: raise ImportError('Brambox needs to be installed to use this dataset') super().__init__(input_dimension) diff --git a/experimental/get_bounding_boxes.py b/experimental/get_bounding_boxes_from_seg_point_masks.py similarity index 58% rename from experimental/get_bounding_boxes.py rename to experimental/get_bounding_boxes_from_seg_point_masks.py index 7c3c10c..934ee8d 100644 --- a/experimental/get_bounding_boxes.py +++ b/experimental/get_bounding_boxes_from_seg_point_masks.py @@ -1,32 +1,42 @@ import brambox as bb import os +from os.path import join, basename from pathflowai.utils import load_sql_df, npy2da import skimage import dask, dask.array as da, pandas as pd, numpy as np import argparse +from scipy import ndimage +from scipy.ndimage.measurements import label +import pickle +from dask.distributed import Client +from multiprocessing import Pool +from functools import reduce -def get_box(i,prop): +def get_box(l,prop): c=[prop.centroid[1], prop.centroid[0]] - l=rev_label[i+1] + # l=rev_label[i+1] width = prop.bbox[3] - prop.bbox[1] + 1 height = prop.bbox[2] - prop.bbox[0] + 1 wh=max(width,height) - #c = [ci-wh/2 for ci in c] + # c = [ci-wh/2 for ci in c] return [l]+c+[wh] -def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): +def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): lbls,n_lbl=label(m) obj_labels={} - for i in range(1,5): + for i in range(1,num_classes+1): obj_labels[i]=np.unique(lbls[m==i].flatten()) rev_label={} for k in obj_labels: for i in obj_labels[k]: rev_label[i]=k - objProps = skimage.measure.regionprops(lbls) - boxes=dask.compute(*[dask.delayed(get_box)(i,prop) for i,prop in enumerate(objProps)],scheduler='threading') + rev_label={k:rev_label[k] for k in sorted(list(rev_label.keys()))} + objProps = list(skimage.measure.regionprops(lbls)) + #print(len(objProps),len(rev_label)) + boxes=dask.compute(*[dask.delayed(get_box)(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())],scheduler='threading') # [get_box(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())]# #print(boxes) boxes=pd.DataFrame(np.array(boxes).astype(int),columns=['class_label','x_top_left','y_top_left','width']) + #boxes['class_label']=m[boxes[['x_top_left','y_top_left']].values.T.tolist()] boxes['height']=boxes['width'] boxes['image']='{}_{}_{}_{}'.format(ID,x,y,patch_size) boxes=boxes[['image','class_label','x_top_left','y_top_left','width','height']] @@ -39,17 +49,22 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): if __name__=='__main__': p=argparse.ArgumentParser() p.add_argument('--num_classes',default=3,type=int) - p.add_argument('--patch_size',default=256,type=int) + p.add_argument('--patch_size',default=512,type=int) + p.add_argument('--n_workers',default=40,type=int) p.add_argument('--p_sample',default=0.7,type=float) p.add_argument('--input_dir',default='inputs',type=str) p.add_argument('--patch_info_file',default='cell_info.db',type=str) p.add_argument('--reference_mask',default='reference_mask.npy',type=str) + #c=Client() + # add mode to just use own extracted boudning boxes or from seg, maybe from histomicstk args=p.parse_args() + n_workers=args.n_workers input_dir=args.input_dir patch_info_file=args.patch_info_file patch_size=args.patch_size p_sample=args.p_sample + np.random.seed(42) annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) reference_mask=args.reference_mask if not os.path.exists('widths.pkl'): @@ -63,33 +78,50 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize'): patch_info=load_sql_df(patch_info_file, patch_size) IDs=patch_info['ID'].unique() #slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} - masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(slide))) for slide in IDs} + masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(mask))) for mask in IDs} if p_sample < 1.: patch_info=patch_info.sample(frac=p_sample) - bbox_dfs=[] - if not os.path.exists(annotation_file): bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']] else: bbox_df=bb.io.load('pandas',annotation_file) - patch_info=patch_info[~np.isin(np.vectorize(lambda i: '_'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist())(patch_info.shape[0]),set(bbox_df.image.cat.categories))] + patch_info=patch_info[~np.isin(np.vectorize(lambda i: '_'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist()))(np.arange(patch_info.shape[0])),set(bbox_df.image.cat.categories))] + + print(patch_info.shape[0]) - for i in patch_info.shape[0]: - print(i) - patch=patch_info.iloc[i] - ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() - m=masks[ID][x:x+patch_size2,y:y+patch_size2] - bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2) - for i in official_widths.index: + def get_boxes_point_seg(m,ID,x,y,patch_size2): + bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2, num_classes=num_classes) + for i in official_widths.keys(): bbox_dff.loc[bbox_dff['class_label']==i,'width']=int(official_widths[i]) - bbox_dff.loc[:,'x_top_left']=boxes.loc[:,'x_top_left']-bbox_df['width']/2 - bbox_dff.loc[:,'y_top_left']=boxes.loc[:,'y_top_left']-bbox_df['width']/2 + bbox_dff.loc[:,'x_top_left']=bbox_dff.loc[:,'x_top_left']-bbox_df['width']/2 + bbox_dff.loc[:,'y_top_left']=bbox_dff.loc[:,'y_top_left']-bbox_df['width']/2 bbox_dff.loc[:,'x_top_left']=np.clip(bbox_dff.loc[:,'x_top_left'],0,m.shape[1]) bbox_dff.loc[:,'y_top_left']=np.clip(bbox_dff.loc[:,'y_top_left'],0,m.shape[0]) - bbox_dfs.append(bbox_dff) + return bbox_dff + + def process_chunk(patch_info_sub): + patch_info_sub=patch_info_sub.reset_index(drop=True) + bbox_dfs=[] + + for i in range(patch_info_sub.shape[0]): + #print(i) + patch=patch_info_sub.iloc[i] + ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() + m=masks[ID][x:x+patch_size2,y:y+patch_size2] + bbox_dff=get_boxes_point_seg(m,ID,x,y,patch_size2)#dask.delayed(get_boxes_point_seg)(m,ID,x,y,patch_size2) + bbox_dfs.append(bbox_dff) + return bbox_dfs + + patch_info_subs=np.array_split(patch_info,n_workers) + + p=Pool(n_workers) + + bbox_dfs=reduce(lambda x,y:x+y,p.map(process_chunk,patch_info_subs)) + + #bbox_dfs=dask.compute(*bbox_dfs,scheduler='processes') bbox_df=pd.concat([bbox_df]+bbox_dfs) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 38b1ab0..b116a8f 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -14,12 +14,15 @@ p=argparse.ArgumentParser() p.add_argument('--num_classes',default=3,type=int) -p.add_argument('--patch_size',default=256,type=int) +p.add_argument('--patch_size',default=512,type=int) p.add_argument('--patch_info_file',default='cell_info.db',type=str) +p.add_argument('--input_dir',default='inputs',type=str) + args=p.parse_args() num_classes=args.num_classes patch_size=args.patch_size patch_info_file=args.patch_info_file +input_dir=args.input_dir annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) annotations=bb.io.load('pandas',annotation_file) @@ -38,6 +41,16 @@ value=2 )]) +# Create HyperParameters +params = ln.engine.HyperParameters( + network=model, + mini_batch_size=8, + batch_size=64, + max_batches=128 +) +params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) +params.optim = torch.optim.SGD(params.network.parameters(), lr=0.001) + post = ln.data.transform.Compose([ ln.data.transform.GetBoundingBoxes( num_classes=params.network.num_classes, @@ -55,7 +68,7 @@ ) ]) -dataset=BramboxPathFlowDataset(patch_info_file, patch_size, annotations, input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) +dataset=BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations, input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) class CustomEngine(ln.engine.Engine): def start(self): @@ -89,17 +102,6 @@ def quit(self): return True return False -# Create HyperParameters -params = ln.engine.HyperParameters( - network=model, - mini_batch_size=8, - batch_size=64, - max_batches=128 -) -params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) -params.optim = torch.optim.SGD(params.network.parameters(), lr=0.001) - - dl = ln.data.DataLoader( dataset, batch_size = 2, @@ -109,5 +111,5 @@ def quit(self): # Create engine engine = CustomEngine( params, dl, # Dataloader (None) is not valid - device=torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu') + device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') ) diff --git a/setup.py b/setup.py index cc77854..b945e11 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ 'shap', 'pyyaml', 'torch-encoding', - 'lightnet', + #'lightnet', 'brambox'] with open('README.md','r', encoding='utf-8') as f: @@ -58,7 +58,8 @@ def run(self): author='Joshua Levy', author_email='joshualevy44@berkeley.edu', license='MIT', - scripts=['bin/install_apex'], + scripts=['bin/install_apex', + 'bin/install_lightnet'], #cmdclass={'install': CustomInstallCommand}, entry_points={ 'console_scripts':['pathflowai-preprocess=pathflowai.cli_preprocessing:preprocessing', From 52281cb29109be102e9011e3b892bdf71f1b96d6 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 13:42:50 -0400 Subject: [PATCH 04/27] Update object_detection.py --- experimental/object_detection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index b116a8f..5e69ab6 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -113,3 +113,5 @@ def quit(self): params, dl, # Dataloader (None) is not valid device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') ) + +engine() From aadb1a29a491f2e9ce410bc88772e8c31ec29414 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 13:46:58 -0400 Subject: [PATCH 05/27] Update object_detection.py --- experimental/object_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 5e69ab6..36c5efc 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -105,7 +105,7 @@ def quit(self): dl = ln.data.DataLoader( dataset, batch_size = 2, - collate_fn = ln.data.list_collate # We want the data to be grouped as a list + collate_fn = ln.data.brambox_collate # We want the data to be grouped as a list ) # Create engine From c53fea97eec6aba90e4274d6c73b937b6ee4d582 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 13:54:07 -0400 Subject: [PATCH 06/27] Fixes --- .../get_bounding_boxes_from_seg_point_masks.py | 11 ++++++----- experimental/object_detection.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/experimental/get_bounding_boxes_from_seg_point_masks.py b/experimental/get_bounding_boxes_from_seg_point_masks.py index 934ee8d..289e715 100644 --- a/experimental/get_bounding_boxes_from_seg_point_masks.py +++ b/experimental/get_bounding_boxes_from_seg_point_masks.py @@ -38,7 +38,7 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): boxes=pd.DataFrame(np.array(boxes).astype(int),columns=['class_label','x_top_left','y_top_left','width']) #boxes['class_label']=m[boxes[['x_top_left','y_top_left']].values.T.tolist()] boxes['height']=boxes['width'] - boxes['image']='{}_{}_{}_{}'.format(ID,x,y,patch_size) + boxes['image']='{}/{}/{}/{}'.format(ID,x,y,patch_size) boxes=boxes[['image','class_label','x_top_left','y_top_left','width','height']] boxes.loc[:,'x_top_left']=np.clip(boxes.loc[:,'x_top_left'],0,m.shape[1]) boxes.loc[:,'y_top_left']=np.clip(boxes.loc[:,'y_top_left'],0,m.shape[0]) @@ -48,7 +48,7 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): if __name__=='__main__': p=argparse.ArgumentParser() - p.add_argument('--num_classes',default=3,type=int) + p.add_argument('--num_classes',default=4,type=int) p.add_argument('--patch_size',default=512,type=int) p.add_argument('--n_workers',default=40,type=int) p.add_argument('--p_sample',default=0.7,type=float) @@ -59,6 +59,7 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): # add mode to just use own extracted boudning boxes or from seg, maybe from histomicstk args=p.parse_args() + num_classes=args.num_classes n_workers=args.n_workers input_dir=args.input_dir patch_info_file=args.patch_info_file @@ -88,11 +89,11 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): else: bbox_df=bb.io.load('pandas',annotation_file) - patch_info=patch_info[~np.isin(np.vectorize(lambda i: '_'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist()))(np.arange(patch_info.shape[0])),set(bbox_df.image.cat.categories))] + patch_info=patch_info[~np.isin(np.vectorize(lambda i: '/'.join(patch_info.iloc[i][['ID','x','y','patch_size']].astype(str).tolist()))(np.arange(patch_info.shape[0])),set(bbox_df.image.cat.categories))] print(patch_info.shape[0]) - def get_boxes_point_seg(m,ID,x,y,patch_size2): + def get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes): bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2, num_classes=num_classes) for i in official_widths.keys(): bbox_dff.loc[bbox_dff['class_label']==i,'width']=int(official_widths[i]) @@ -111,7 +112,7 @@ def process_chunk(patch_info_sub): patch=patch_info_sub.iloc[i] ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() m=masks[ID][x:x+patch_size2,y:y+patch_size2] - bbox_dff=get_boxes_point_seg(m,ID,x,y,patch_size2)#dask.delayed(get_boxes_point_seg)(m,ID,x,y,patch_size2) + bbox_dff=get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes)#dask.delayed(get_boxes_point_seg)(m,ID,x,y,patch_size2) bbox_dfs.append(bbox_dff) return bbox_dfs diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 36c5efc..161a21a 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -13,7 +13,7 @@ # https://eavise.gitlab.io/lightnet/notes/02-B-engine.html p=argparse.ArgumentParser() -p.add_argument('--num_classes',default=3,type=int) +p.add_argument('--num_classes',default=4,type=int) p.add_argument('--patch_size',default=512,type=int) p.add_argument('--patch_info_file',default='cell_info.db',type=str) p.add_argument('--input_dir',default='inputs',type=str) From 5bf880c5c867ea358e94945ddb1ab53575cdb5f4 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 14:13:55 -0400 Subject: [PATCH 07/27] Debugging neural net --- experimental/datasets.py | 5 ++++- experimental/object_detection.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/experimental/datasets.py b/experimental/datasets.py index bf0cafb..69530cb 100644 --- a/experimental/datasets.py +++ b/experimental/datasets.py @@ -45,13 +45,15 @@ def __init__(self, input_dir, patch_info_file, patch_size, annotations, input_di super().__init__(input_dimension) self.annos = annotations + #print(self.annos.shape) self.keys = self.annos.image.cat.categories # stores unique patches + #print(self.keys) self.img_tf = img_transform self.anno_tf = anno_transform self.patch_info=load_sql_df(patch_info_file, patch_size) IDs=self.patch_info['ID'].unique() self.slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} - self.id = lambda k: self.keys[k].split('/') + self.id = lambda k: k.split('/') # Add class_ids if class_label_map is None: @@ -76,6 +78,7 @@ def __getitem__(self, index): raise IndexError(f'list index out of range [{index}/{len(self)-1}]') # Load + #print(self.keys[index]) ID,x,y,patch_size=self.id(self.keys[index]) x,y,patch_size=int(x),int(y),int(patch_size) img = self.slides[ID][x:x+patch_size,y:y+patch_size].compute()#Image.open(self.id(self.keys[index])) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 161a21a..e58a9dd 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -80,6 +80,7 @@ def start(self): def process_batch(self, data): """ Forward and backward pass """ data, target = data # Unpack + data=data.transpose((3,1,2)) output = self.network(data) loss = self.loss(output, target) From 7fb7e95f1486c62d2d75c8f054293a670103e8dd Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 25 Sep 2019 15:33:28 -0400 Subject: [PATCH 08/27] Anchors added --- experimental/datasets.py | 3 +++ experimental/get_anchors.py | 23 +++++++++++++++++++ ...get_bounding_boxes_from_seg_point_masks.py | 8 +++++-- experimental/object_detection.py | 18 +++++++++++---- 4 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 experimental/get_anchors.py diff --git a/experimental/datasets.py b/experimental/datasets.py index 69530cb..9185684 100644 --- a/experimental/datasets.py +++ b/experimental/datasets.py @@ -45,6 +45,9 @@ def __init__(self, input_dir, patch_info_file, patch_size, annotations, input_di super().__init__(input_dimension) self.annos = annotations + self.annos['ignore']=0 + self.annos['class_label']=self.annos['class_label'].astype(int)#-1 + print(self.annos['class_label'].unique()) #print(self.annos.shape) self.keys = self.annos.image.cat.categories # stores unique patches #print(self.keys) diff --git a/experimental/get_anchors.py b/experimental/get_anchors.py new file mode 100644 index 0000000..d98004f --- /dev/null +++ b/experimental/get_anchors.py @@ -0,0 +1,23 @@ +from sklearn.cluster import KMeans +import numpy as np, pandas as pd, brambox as bb +import pickle, argparse + +p=argparse.ArgumentParser() +p.add_argument('--patch_size',default=512,type=int) +p.add_argument('--n_anchors',default=20,type=int) +p.add_argument('--sample_p',default=1.,type=float) + +args=p.parse_args() +np.random.seed(42) +patch_size=args.patch_size +n_anchors=args.n_anchors +sample_p=args.sample_p +annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) +annotations=bb.io.load('pandas',annotation_file) +if sample_p<1.: + annotations=annotations.sample(frac=sample_p) + +X=annotations[['x_top_left','y_top_left']].astype(float).values+(annotations['width']/2.).astype(float).values.reshape(-1,1) +km=KMeans(n_clusters=n_anchors,n_jobs=-1).fit(X) +anchors=km.cluster_centers_ +pickle.dump(anchors,open('anchors.pkl','wb')) diff --git a/experimental/get_bounding_boxes_from_seg_point_masks.py b/experimental/get_bounding_boxes_from_seg_point_masks.py index 289e715..fb0252f 100644 --- a/experimental/get_bounding_boxes_from_seg_point_masks.py +++ b/experimental/get_bounding_boxes_from_seg_point_masks.py @@ -36,14 +36,17 @@ def get_boxes(m,ID='test',x='x',y='y',patch_size='patchsize', num_classes=3): boxes=dask.compute(*[dask.delayed(get_box)(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())],scheduler='threading') # [get_box(rev_label[i],objProps[i-1]) for i in list(rev_label.keys())]# #print(boxes) boxes=pd.DataFrame(np.array(boxes).astype(int),columns=['class_label','x_top_left','y_top_left','width']) + #boxes['class_label']=m[boxes[['x_top_left','y_top_left']].values.T.tolist()] boxes['height']=boxes['width'] boxes['image']='{}/{}/{}/{}'.format(ID,x,y,patch_size) boxes=boxes[['image','class_label','x_top_left','y_top_left','width','height']] boxes.loc[:,'x_top_left']=np.clip(boxes.loc[:,'x_top_left'],0,m.shape[1]) boxes.loc[:,'y_top_left']=np.clip(boxes.loc[:,'y_top_left'],0,m.shape[0]) + bbox_df=bb.util.new('annotation').drop(columns=['difficult','ignore','lost','occluded','truncated'])[['image','class_label','x_top_left','y_top_left','width','height']] bbox_df=bbox_df.append(boxes) + #print(boxes) return boxes if __name__=='__main__': @@ -97,8 +100,8 @@ def get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes): bbox_dff=get_boxes(m,ID=ID,x=x,y=y,patch_size=patch_size2, num_classes=num_classes) for i in official_widths.keys(): bbox_dff.loc[bbox_dff['class_label']==i,'width']=int(official_widths[i]) - bbox_dff.loc[:,'x_top_left']=bbox_dff.loc[:,'x_top_left']-bbox_df['width']/2 - bbox_dff.loc[:,'y_top_left']=bbox_dff.loc[:,'y_top_left']-bbox_df['width']/2 + bbox_dff.loc[:,'x_top_left']=(bbox_dff.loc[:,'x_top_left']-bbox_dff['width']/2.).astype(int) + bbox_dff.loc[:,'y_top_left']=(bbox_dff.loc[:,'y_top_left']-bbox_dff['width']/2.).astype(int) bbox_dff.loc[:,'x_top_left']=np.clip(bbox_dff.loc[:,'x_top_left'],0,m.shape[1]) bbox_dff.loc[:,'y_top_left']=np.clip(bbox_dff.loc[:,'y_top_left'],0,m.shape[0]) return bbox_dff @@ -113,6 +116,7 @@ def process_chunk(patch_info_sub): ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() m=masks[ID][x:x+patch_size2,y:y+patch_size2] bbox_dff=get_boxes_point_seg(m,ID,x,y,patch_size2,num_classes)#dask.delayed(get_boxes_point_seg)(m,ID,x,y,patch_size2) + #print(bbox_dff) bbox_dfs.append(bbox_dff) return bbox_dfs diff --git a/experimental/object_detection.py b/experimental/object_detection.py index e58a9dd..7906806 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -5,7 +5,7 @@ import brambox as bb import dask as da from datasets import BramboxPathFlowDataset -import argparse +import argparse, pickle # Settings ln.logger.setConsoleLevel('ERROR') # Only show error log messages @@ -19,15 +19,16 @@ p.add_argument('--input_dir',default='inputs',type=str) args=p.parse_args() -num_classes=args.num_classes +num_classes=args.num_classes+1 patch_size=args.patch_size patch_info_file=args.patch_info_file input_dir=args.input_dir +anchors=pickle.load(open('anchors.pkl','rb')) annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) annotations=bb.io.load('pandas',annotation_file) -model=ln.models.Yolo(num_classes=num_classes) +model=ln.models.Yolo(num_classes=num_classes,anchors=anchors.tolist()) loss = ln.network.loss.RegionLoss( num_classes=model.num_classes, @@ -49,7 +50,7 @@ max_batches=128 ) params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) -params.optim = torch.optim.SGD(params.network.parameters(), lr=0.001) +params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-5) post = ln.data.transform.Compose([ ln.data.transform.GetBoundingBoxes( @@ -80,10 +81,17 @@ def start(self): def process_batch(self, data): """ Forward and backward pass """ data, target = data # Unpack - data=data.transpose((3,1,2)) + #print(target) + data=data.permute(0,3,1,2).float() + if torch.cuda.is_available(): + data=data.cuda() + + #print(data) output = self.network(data) + #print(output) loss = self.loss(output, target) + print(loss) loss.backward() self.loss_acc.append(loss.item()) From c2577f07c28bc4688ca842e25805ea49d1468d8f Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Fri, 27 Sep 2019 15:04:59 -0400 Subject: [PATCH 09/27] Update object_detection.py --- experimental/object_detection.py | 65 ++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 7906806..30bca40 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -1,11 +1,12 @@ import lightnet as ln import torch -import numpy as np +import numpy as np, pandas as pd import matplotlib.pyplot as plt import brambox as bb import dask as da from datasets import BramboxPathFlowDataset import argparse, pickle +from sklearn import train_test_split # Settings ln.logger.setConsoleLevel('ERROR') # Only show error log messages @@ -17,17 +18,27 @@ p.add_argument('--patch_size',default=512,type=int) p.add_argument('--patch_info_file',default='cell_info.db',type=str) p.add_argument('--input_dir',default='inputs',type=str) +p.add_argument('--sample_p',default=1.,type=float) args=p.parse_args() +np.random.seed(42) num_classes=args.num_classes+1 patch_size=args.patch_size patch_info_file=args.patch_info_file input_dir=args.input_dir +sample_p=args.sample_p anchors=pickle.load(open('anchors.pkl','rb')) annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) annotations=bb.io.load('pandas',annotation_file) +if sample_p < 1.: + annotations=annotations.sample(frac=sample_p) + +annotations_dict={} +annotations_dict['train'],annotations_dict['test']=train_test_split(annotations) +annotations_dict['train'],annotations_dict['val']=train_test_split(annotations_dict['train']) + model=ln.models.Yolo(num_classes=num_classes,anchors=anchors.tolist()) loss = ln.network.loss.RegionLoss( @@ -49,8 +60,6 @@ batch_size=64, max_batches=128 ) -params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) -params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-5) post = ln.data.transform.Compose([ ln.data.transform.GetBoundingBoxes( @@ -69,7 +78,18 @@ ) ]) -dataset=BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations, input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) +datasets={BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations_dict[k], input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) for k in ['train','val','test']} + +params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) +params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-5) + +dls = {k:ln.data.DataLoader( + datasets[k], + batch_size = 64, + collate_fn = ln.data.brambox_collate # We want the data to be grouped as a list +) for k in ['train','val','test']} + +params.val_loader=dls['val'] class CustomEngine(ln.engine.Engine): def start(self): @@ -91,11 +111,35 @@ def process_batch(self, data): output = self.network(data) #print(output) loss = self.loss(output, target) - print(loss) + #print(loss) loss.backward() self.loss_acc.append(loss.item()) + @ln.engine.Engine.batch_end(100) # how to pass in validation dataloader + def val_loop(self): + with torch.no_grad(): + for i,data in enumerate(self.val_loader): + if i > 100: + break + data, target = data + output = self.network(data) + loss = self.loss(output, target) + bbox=post(output) + if not i: + bbox_final=[bbox] + else: + bbox_final.append(bbox) + + detections=pd.concat(bbox_final) + pr=bb.stat.pr(det, annotations_dict['val'], threshold=0.5) + auc=bb.stat.auc(pr) + print('VAL AUC={}'.format(auc)) + + @ln.engine.Engine.batch_end(3000) + def save_model(self): + self.params.save(f'backup-{self.batch}.state.pt') + def train_batch(self): """ Weight update and logging """ self.optim.step() @@ -111,16 +155,13 @@ def quit(self): return True return False -dl = ln.data.DataLoader( - dataset, - batch_size = 2, - collate_fn = ln.data.brambox_collate # We want the data to be grouped as a list -) + # Create engine engine = CustomEngine( - params, dl, # Dataloader (None) is not valid + params, dls['train'], # Dataloader (None) is not valid device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') ) -engine() +for i in range(10): + engine() From fe8e953d3591cfa0c8e67e09bb5c30f4cffcc89d Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Fri, 27 Sep 2019 15:07:54 -0400 Subject: [PATCH 10/27] Update object_detection.py --- experimental/object_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 30bca40..8f9f138 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -6,7 +6,7 @@ import dask as da from datasets import BramboxPathFlowDataset import argparse, pickle -from sklearn import train_test_split +from sklearn.model_selection import train_test_split # Settings ln.logger.setConsoleLevel('ERROR') # Only show error log messages From 63202e10de16d256e27d76833c5b4a3b565a4f11 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Fri, 27 Sep 2019 15:32:54 -0400 Subject: [PATCH 11/27] Update object_detection.py --- experimental/object_detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index 8f9f138..c5bd0ad 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -78,7 +78,7 @@ ) ]) -datasets={BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations_dict[k], input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) for k in ['train','val','test']} +datasets={k:BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations_dict[k], input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) for k in ['train','val','test']} params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-5) @@ -87,7 +87,7 @@ datasets[k], batch_size = 64, collate_fn = ln.data.brambox_collate # We want the data to be grouped as a list -) for k in ['train','val','test']} + ) for k in ['train','val','test']} params.val_loader=dls['val'] From 60a1ab384d1f15cc13604212a6c258a5bcc5b7b5 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Fri, 27 Sep 2019 17:07:26 -0400 Subject: [PATCH 12/27] Update object_detection.py --- experimental/object_detection.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index c5bd0ad..bb0af20 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -123,6 +123,9 @@ def val_loop(self): if i > 100: break data, target = data + data=data.permute(0,3,1,2).float() + if torch.cuda.is_available(): + data=data.cuda() output = self.network(data) loss = self.loss(output, target) bbox=post(output) From d334208b1c8b9a089b5942add6ac26dda378e437 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 30 Sep 2019 14:05:20 -0400 Subject: [PATCH 13/27] Update object_detection.py --- experimental/object_detection.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/experimental/object_detection.py b/experimental/object_detection.py index bb0af20..d74a205 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -57,7 +57,7 @@ params = ln.engine.HyperParameters( network=model, mini_batch_size=8, - batch_size=64, + batch_size=16, max_batches=128 ) @@ -135,11 +135,13 @@ def val_loop(self): bbox_final.append(bbox) detections=pd.concat(bbox_final) - pr=bb.stat.pr(det, annotations_dict['val'], threshold=0.5) + print(detections) + print(annotations_dict['val']) + pr=bb.stat.pr(detections, annotations_dict['val'], threshold=0.5) auc=bb.stat.auc(pr) print('VAL AUC={}'.format(auc)) - @ln.engine.Engine.batch_end(3000) + @ln.engine.Engine.batch_end(300) def save_model(self): self.params.save(f'backup-{self.batch}.state.pt') From 36fce8849f98be2fc6359e217c07bc7e85caa62c Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Sat, 26 Oct 2019 03:11:11 -0400 Subject: [PATCH 14/27] new installs --- README.md | 2 +- experimental/datasets.py | 5 ++++- experimental/object_detection.py | 35 ++++++++++++++++++++++++-------- pathflowai/datasets.py | 2 +- setup.py | 6 +++++- 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 209f2a8..51fc6c5 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ MedRxiv Manuscript: https://www.medrxiv.org/content/10.1101/19003897v1 ## Install -First, install [openslide](https://openslide.org/download/). +First, install [openslide](https://openslide.org/download/). Note: may need to install libiconv and shapely using conda. Will update with more installation information, please submit issues as well. ```sh pip install pathflowai diff --git a/experimental/datasets.py b/experimental/datasets.py index 9185684..4e8e98b 100644 --- a/experimental/datasets.py +++ b/experimental/datasets.py @@ -25,6 +25,7 @@ # ADD IMAGE ANNOTATION TRANSFORM # ADD TRAIN VAL TEST INFO + class BramboxPathFlowDataset(lnd.Dataset): """ Dataset for any brambox annotations. @@ -57,7 +58,9 @@ def __init__(self, input_dir, patch_info_file, patch_size, annotations, input_di IDs=self.patch_info['ID'].unique() self.slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} self.id = lambda k: k.split('/') - + # experiment + #self.annos['x_top_left'], self.annos['y_top_left']=self.annos['y_top_left'], self.annos['x_top_left'] + self.annos['width'], self.annos['height']=self.annos['height'], self.annos['width'] # Add class_ids if class_label_map is None: log.warning(f'No class_label_map given, generating it by sorting unique class labels from data alphabetically, which is not always deterministic behaviour') diff --git a/experimental/object_detection.py b/experimental/object_detection.py index d74a205..34eba09 100644 --- a/experimental/object_detection.py +++ b/experimental/object_detection.py @@ -19,14 +19,20 @@ p.add_argument('--patch_info_file',default='cell_info.db',type=str) p.add_argument('--input_dir',default='inputs',type=str) p.add_argument('--sample_p',default=1.,type=float) +p.add_argument('--conf_thresh',default=0.01,type=float) +p.add_argument('--nms_thresh',default=0.5,type=float) + args=p.parse_args() np.random.seed(42) num_classes=args.num_classes+1 patch_size=args.patch_size +batch_size=64 patch_info_file=args.patch_info_file input_dir=args.input_dir sample_p=args.sample_p +conf_thresh=args.conf_thresh +nms_thresh=args.nms_thresh anchors=pickle.load(open('anchors.pkl','rb')) annotation_file = 'annotations_bbox_{}.pkl'.format(patch_size) @@ -56,20 +62,21 @@ # Create HyperParameters params = ln.engine.HyperParameters( network=model, - mini_batch_size=8, - batch_size=16, - max_batches=128 + input_dimension = (patch_size,patch_size), + mini_batch_size=16, + batch_size=batch_size, + max_batches=80000 ) post = ln.data.transform.Compose([ ln.data.transform.GetBoundingBoxes( num_classes=params.network.num_classes, anchors=params.network.anchors, - conf_thresh=0.5, + conf_thresh=conf_thresh, ), ln.data.transform.NonMaxSuppression( - nms_thresh=0.5 + nms_thresh=nms_thresh ), ln.data.transform.TensorToBrambox( @@ -78,14 +85,19 @@ ) ]) -datasets={k:BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations_dict[k], input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=transforms, anno_transform=None) for k in ['train','val','test']} +datasets={k:BramboxPathFlowDataset(input_dir,patch_info_file, patch_size, annotations_dict[k], input_dimension=(patch_size,patch_size), class_label_map=None, identify=None, img_transform=None, anno_transform=None) for k in ['train','val','test']} +# transforms params.loss = ln.network.loss.RegionLoss(params.network.num_classes, params.network.anchors) -params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-5) +params.optim = torch.optim.SGD(params.network.parameters(), lr=1e-4) +params.scheduler = ln.engine.SchedulerCompositor( + # batch scheduler + (0, torch.optim.lr_scheduler.CosineAnnealingLR(params.optim,T_max=200)) + ) dls = {k:ln.data.DataLoader( datasets[k], - batch_size = 64, + batch_size = batch_size, collate_fn = ln.data.brambox_collate # We want the data to be grouped as a list ) for k in ['train','val','test']} @@ -110,9 +122,13 @@ def process_batch(self, data): output = self.network(data) #print(output) + loss = self.loss(output, target) + #print(loss) loss.backward() + bbox=post(output) + print(bbox) self.loss_acc.append(loss.item()) @@ -127,8 +143,11 @@ def val_loop(self): if torch.cuda.is_available(): data=data.cuda() output = self.network(data) + #print(output) loss = self.loss(output, target) + print(loss) bbox=post(output) + print(bbox) if not i: bbox_final=[bbox] else: diff --git a/pathflowai/datasets.py b/pathflowai/datasets.py index fcf6dd7..24f609a 100644 --- a/pathflowai/datasets.py +++ b/pathflowai/datasets.py @@ -528,7 +528,7 @@ def update_dataset(self, input_dir, new_db, prediction_basename=[]): self.segmentation_maps = {slide:npy2da(join(self.input_dir,'{}_mask.npy'.format(slide))) for slide in IDs} self.length = self.patch_info.shape[0] - @pysnooper.snoop('get_item.log') + #@pysnooper.snoop('get_item.log') def __getitem__(self, i): patch_info = self.patch_info.iloc[i] ID = patch_info['ID'] diff --git a/setup.py b/setup.py index b945e11..a4006e7 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,11 @@ 'pyyaml', 'torch-encoding', #'lightnet', - 'brambox'] + 'brambox', + 'blosc', + 'numcodecs', + 'zarr', + ] with open('README.md','r', encoding='utf-8') as f: long_description = f.read() From 575d6834109fb9608d296b01e2132379345dd541 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Sun, 3 Nov 2019 02:26:41 -0500 Subject: [PATCH 15/27] New functionality --- bin/install_apex | 2 + experimental/get_counts.py | 73 ++++++++++++++++++++++++++++++++++++ pathflowai/datasets.py | 2 + pathflowai/model_training.py | 14 ++++--- pathflowai/models.py | 26 +++++++++---- pathflowai/utils.py | 9 +++-- 6 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 experimental/get_counts.py diff --git a/bin/install_apex b/bin/install_apex index af171c1..3dc2635 100644 --- a/bin/install_apex +++ b/bin/install_apex @@ -1,5 +1,7 @@ #!/bin/bash +export TMPDIR=$HOME/tmp +mkdir -p $TMPDIR rm -rf apex git clone https://github.com/NVIDIA/apex cd apex diff --git a/experimental/get_counts.py b/experimental/get_counts.py new file mode 100644 index 0000000..d242324 --- /dev/null +++ b/experimental/get_counts.py @@ -0,0 +1,73 @@ +import brambox as bb +import os +from os.path import join, basename +from pathflowai.utils import load_sql_df, npy2da, df2sql +import skimage +import dask, dask.array as da, pandas as pd, numpy as np +import argparse +from scipy import ndimage +from scipy.ndimage.measurements import label +import pickle +from dask.distributed import Client +from multiprocessing import Pool +from functools import reduce + +def count_cells(m, num_classes=3): + lbls,n_lbl=label(m) + obj_labels=np.zeros(num_classes) + for i in range(1,num_classes+1): + obj_labels[i-1]=len(np.unique(lbls[m==i].flatten())) + return obj_labels + +if __name__=='__main__': + p=argparse.ArgumentParser() + p.add_argument('--num_classes',default=4,type=int) + p.add_argument('--patch_size',default=512,type=int) + p.add_argument('--n_workers',default=40,type=int) + p.add_argument('--p_sample',default=0.7,type=float) + p.add_argument('--input_dir',default='inputs',type=str) + p.add_argument('--patch_info_file',default='cell_info.db',type=str) + p.add_argument('--reference_mask',default='reference_mask.npy',type=str) + #c=Client() + # add mode to just use own extracted boudning boxes or from seg, maybe from histomicstk + + args=p.parse_args() + num_classes=args.num_classes + n_workers=args.n_workers + input_dir=args.input_dir + patch_info_file=args.patch_info_file + patch_size=args.patch_size + np.random.seed(42) + reference_mask=args.reference_mask + + patch_info=load_sql_df(patch_info_file, patch_size) + IDs=patch_info['ID'].unique() + #slides = {slide:da.from_zarr(join(input_dir,'{}.zarr'.format(slide))) for slide in IDs} + masks = {mask:npy2da(join(input_dir,'{}_mask.npy'.format(mask))) for mask in IDs} + + def process_chunk(patch_info_sub): + patch_info_sub=patch_info_sub.reset_index(drop=True) + counts=[] + for i in range(patch_info_sub.shape[0]): + #print(i) + patch=patch_info_sub.iloc[i] + ID,x,y,patch_size2=patch[['ID','x','y','patch_size']].tolist() + m=masks[ID][x:x+patch_size2,y:y+patch_size2] + counts.append(dask.delayed(count_cells)(m, num_classes=num_classes)) + + return dask.compute(*counts,scheduler='threading') + + patch_info_subs=np.array_split(patch_info,n_workers) + + p=Pool(n_workers) + + counts=reduce(lambda x,y:x+y,p.map(process_chunk,patch_info_subs)) + + #bbox_dfs=dask.compute(*bbox_dfs,scheduler='processes') + + counts=pd.DataFrame(np.vstack(counts)) + + patch_info=pd.concat([patch_info[['ID','x','y','patch_size','annotation']].reset_index(drop=True),counts.reset_index(drop=True)],axis=1).reset_index() + print(patch_info) + + df2sql(patch_info, 'counts_test.db', patch_size, mode='replace') diff --git a/pathflowai/datasets.py b/pathflowai/datasets.py index 24f609a..9be82ee 100644 --- a/pathflowai/datasets.py +++ b/pathflowai/datasets.py @@ -372,6 +372,8 @@ def __init__(self,dataset_df, set, patch_info_file, transformers, input_dir, tar self.classify_annotations=classify_annotations print(self.targets) self.dilation_jitter=DilationJitter(dilation_jitter,self.segmentation,(original_set=='train')) + if not self.targets: + self.targets = [pos_annotation_class]+list(other_annotations) def concat(self, other_dataset): """Concatenate this dataset with others. Updates its own internal attributes. diff --git a/pathflowai/model_training.py b/pathflowai/model_training.py index 9405b02..5f64c7b 100644 --- a/pathflowai/model_training.py +++ b/pathflowai/model_training.py @@ -124,7 +124,8 @@ def train_model_(training_opts): eta_min=training_opts['eta_min'], T_mult=training_opts['T_mult']), loss_fn=training_opts['loss_fn'], - num_train_batches=num_train_batches) + num_train_batches=num_train_batches, + seg_out_class=training_opts['seg_out_class']) if not training_opts['predict']: @@ -164,7 +165,7 @@ def train_model_(training_opts): exit() y_pred = trainer.predict(dataloader) print(ID,y_pred.shape) - segmentation_predictions2npy(y_pred, dataset.patch_info, dataset.segmentation_maps[ID], npy_output='{}/{}_predict.npy'.format(training_opts['prediction_output_dir'],ID), original_patch_size=training_opts['patch_size'], resized_patch_size=training_opts['patch_resize']) + segmentation_predictions2npy(y_pred, dataset.patch_info, dataset.segmentation_maps[ID], npy_output='{}/{}_predict.npy'.format(training_opts['prediction_output_dir'],ID), original_patch_size=training_opts['patch_size'], resized_patch_size=training_opts['patch_resize'], output_probs=(training_opts['seg_out_class']>=0)) else: extract_embedding=training_opts['extract_embedding'] if extract_embedding: @@ -238,7 +239,9 @@ def train_model_(training_opts): @click.option('-cw', '--custom_weights', default='', help='Comma delimited custom weights', type=click.Path(exists=False), show_default=True) @click.option('-pset', '--prediction_set', default='test', help='Dataset to predict on.', type=click.Choice(['train','val','test']), show_default=True) @click.option('-ut', '--user_transforms_file', default='', help='YAML file to add transforms from.', type=click.Path(exists=False), show_default=True) -def train_model(segmentation,prediction,pos_annotation_class,other_annotations,save_location,pretrained_save_location,input_dir,patch_size,patch_resize,target_names,dataset_df,fix_names, architecture, imbalanced_correction, imbalanced_correction2, classify_annotations, num_targets, subsample_p,subsample_p_val,num_training_images_epoch, learning_rate, transform_platform, n_epoch, patch_info_file, target_segmentation_class, target_threshold, oversampling_factor, supplement, batch_size, run_test, mt_bce, prediction_output_dir, extract_embedding, extract_model, binary_threshold, pretrain, overwrite_loss_fn, adopt_training_loss, external_test_db,external_test_dir, prediction_basename, custom_weights, prediction_set, user_transforms_file): +@click.option('-svp', '--save_val_predictions', is_flag=True, help='Whether to save the validation predictions.', show_default=True) +@click.option('-soc', '--seg_out_class', default=-1, help='Output a particular segmentation class probabilities.', show_default=True) +def train_model(segmentation,prediction,pos_annotation_class,other_annotations,save_location,pretrained_save_location,input_dir,patch_size,patch_resize,target_names,dataset_df,fix_names, architecture, imbalanced_correction, imbalanced_correction2, classify_annotations, num_targets, subsample_p,subsample_p_val,num_training_images_epoch, learning_rate, transform_platform, n_epoch, patch_info_file, target_segmentation_class, target_threshold, oversampling_factor, supplement, batch_size, run_test, mt_bce, prediction_output_dir, extract_embedding, extract_model, binary_threshold, pretrain, overwrite_loss_fn, adopt_training_loss, external_test_db,external_test_dir, prediction_basename, custom_weights, prediction_set, user_transforms_file, save_val_predictions, seg_out_class): """Train and predict using model for regression and classification tasks.""" # add separate pretrain ability on separating cell types, then transfer learn # add pretrain and efficient net, pretraining remove last layer while loading state dict @@ -296,11 +299,12 @@ def train_model(segmentation,prediction,pos_annotation_class,other_annotations,s external_test_db=external_test_db, external_test_dir=external_test_dir, prediction_basename=prediction_basename, - save_val_predictions=True, + save_val_predictions=save_val_predictions, custom_weights=custom_weights, prediction_set=prediction_set, user_transforms=dict(), - dilation_jitter=dict()) + dilation_jitter=dict(), + seg_out_class=seg_out_class) training_opts = dict(normalization_file="normalization_parameters.pkl", loss_fn='bce', diff --git a/pathflowai/models.py b/pathflowai/models.py index 6ac138c..cf0528c 100644 --- a/pathflowai/models.py +++ b/pathflowai/models.py @@ -231,7 +231,7 @@ class ModelTrainer: num_train_batches:int Number of training batches for epoch. """ - def __init__(self, model, n_epoch=300, validation_dataloader=None, optimizer_opts=dict(name='adam',lr=1e-3,weight_decay=1e-4), scheduler_opts=dict(scheduler='warm_restarts',lr_scheduler_decay=0.5,T_max=10,eta_min=5e-8,T_mult=2), loss_fn='ce', reduction='mean', num_train_batches=None): + def __init__(self, model, n_epoch=300, validation_dataloader=None, optimizer_opts=dict(name='adam',lr=1e-3,weight_decay=1e-4), scheduler_opts=dict(scheduler='warm_restarts',lr_scheduler_decay=0.5,T_max=10,eta_min=5e-8,T_mult=2), loss_fn='ce', reduction='mean', num_train_batches=None, seg_out_class=-1): self.model = model optimizers = {'adam':torch.optim.Adam, 'sgd':torch.optim.SGD} @@ -240,7 +240,11 @@ def __init__(self, model, n_epoch=300, validation_dataloader=None, optimizer_opt if 'name' not in list(optimizer_opts.keys()): optimizer_opts['name']='adam' self.optimizer = optimizers[optimizer_opts.pop('name')](self.model.parameters(),**optimizer_opts) - self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O2') + if torch.cuda.is_available(): + self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O2') + self.cuda=True + else: + self.cuda=False self.scheduler = Scheduler(optimizer=self.optimizer,opts=scheduler_opts) self.n_epoch = n_epoch self.validation_dataloader = validation_dataloader @@ -251,6 +255,7 @@ def __init__(self, model, n_epoch=300, validation_dataloader=None, optimizer_opt self.original_loss_fn = copy.deepcopy(loss_functions[loss_fn]) self.num_train_batches = num_train_batches self.val_loss_fn = copy.deepcopy(loss_functions[loss_fn]) + self.seg_out_class=seg_out_class def calc_loss(self, y_pred, y_true): """Calculates loss supplied in init statement and modified by reweighting. @@ -348,8 +353,11 @@ def loss_backward(self,loss): Torch loss calculated. """ - with amp.scale_loss(loss,self.optimizer) as scaled_loss: - scaled_loss.backward() + if self.cuda: + with amp.scale_loss(loss,self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() #@pysnooper.snoop('train_loop.log') def train_loop(self, epoch, train_dataloader): @@ -493,13 +501,17 @@ def test_loop(self, test_dataloader): if torch.cuda.is_available(): X = X.cuda() if test_dataloader.dataset.segmentation: - prediction=self.model(X).detach().cpu().numpy().argmax(axis=1) + prediction=self.model(X).detach().cpu().numpy() + if self.seg_out_class>=0: + prediction=prediction[:,self.seg_out_class,...] + else: + prediction=prediction.argmax(axis=1).astype(int) pred_size=prediction.shape#size() #pred_mean=prediction[0].mean(axis=0) - y_pred.append((prediction).astype(int)) + y_pred.append(prediction) else: prediction=self.model(X) - if (len(test_dataloader.dataset.targets)-1) or self.bce: + if self.loss_fn_name != 'mse' and ((len(test_dataloader.dataset.targets)-1) or self.bce): prediction=self.sigmoid(prediction) elif test_dataloader.dataset.classify_annotations: prediction=F.softmax(prediction,dim=1) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index cb80f2a..b816c88 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -859,7 +859,7 @@ def fix_names(file_dir): ####### #@pysnooper.snoop('seg2npy.log') -def segmentation_predictions2npy(y_pred, patch_info, segmentation_map, npy_output, original_patch_size=500, resized_patch_size=256): +def segmentation_predictions2npy(y_pred, patch_info, segmentation_map, npy_output, original_patch_size=500, resized_patch_size=256, output_probs=False): """Convert segmentation predictions from model to numpy masks. Parameters @@ -875,11 +875,12 @@ def segmentation_predictions2npy(y_pred, patch_info, segmentation_map, npy_outpu """ import cv2 import copy + print(output_probs) seg_map_shape=segmentation_map.shape[-2:] original_seg_shape=copy.deepcopy(seg_map_shape) if resized_patch_size!=original_patch_size: seg_map_shape = [int(dim*resized_patch_size/original_patch_size) for dim in seg_map_shape] - segmentation_map = np.zeros(tuple(seg_map_shape)) + segmentation_map = np.zeros(tuple(seg_map_shape)).astype(float) for i in range(patch_info.shape[0]): patch_info_i = patch_info.iloc[i] ID = patch_info_i['ID'] @@ -895,4 +896,6 @@ def segmentation_predictions2npy(y_pred, patch_info, segmentation_map, npy_outpu if resized_patch_size!=original_patch_size: segmentation_map=cv2.resize(segmentation_map.astype(float), dsize=original_seg_shape, interpolation=cv2.INTER_NEAREST) os.makedirs(npy_output[:npy_output.rfind('/')],exist_ok=True) - np.save(npy_output,segmentation_map.astype(np.uint8)) + if not output_probs: + segmentation_map=segmentation_map.astype(np.uint8) + np.save(npy_output,segmentation_map) From 811e3820681e5c483245b3dab7aee7e34db3481b Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Tue, 17 Dec 2019 17:26:16 -0800 Subject: [PATCH 16/27] Update cli_visualizations.py --- pathflowai/cli_visualizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathflowai/cli_visualizations.py b/pathflowai/cli_visualizations.py index 91cf3ff..2ad4408 100644 --- a/pathflowai/cli_visualizations.py +++ b/pathflowai/cli_visualizations.py @@ -125,7 +125,7 @@ def plot_embeddings(embeddings_file,plotly_output_file, annotations, remove_back """Perform UMAP embeddings of patches and plot using plotly.""" import torch from umap import UMAP - from visualize import PlotlyPlot + from pathflowai.visualize import PlotlyPlot import pandas as pd, numpy as np embeddings_dict=torch.load(embeddings_file) embeddings=embeddings_dict['embeddings'] From d65a0cbfb81f6cee05725ad766a9ae57e65d6b03 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Thu, 9 Jan 2020 07:37:59 -0800 Subject: [PATCH 17/27] Adding SqueezeNext --- pathflowai/model_training.py | 2 +- pathflowai/models.py | 7 ++++++- setup.py | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pathflowai/model_training.py b/pathflowai/model_training.py index 5f64c7b..ec1c0cf 100644 --- a/pathflowai/model_training.py +++ b/pathflowai/model_training.py @@ -207,7 +207,7 @@ def train_model_(training_opts): @click.option('-fn', '--fix_names', is_flag=True, help='Whether to fix names in dataset_df.', show_default=True) @click.option('-a', '--architecture', default='alexnet', help='Neural Network Architecture.', type=click.Choice(['alexnet', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'inception_v3', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'vgg11', 'vgg11_bn','unet','unet2','nested_unet','fast_scnn', - 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'deeplabv3_resnet101','deeplabv3_resnet50','fcn_resnet101', 'fcn_resnet50']+['efficientnet-b{}'.format(i) for i in range(8)]), show_default=True) + 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'deeplabv3_resnet101','deeplabv3_resnet50','fcn_resnet101', 'fcn_resnet50',"sqnxt23_w3d2", "sqnxt23_w2", "sqnxt23v5_w1", "sqnxt23v5_w3d2", "sqnxt23v5_w2"]+['efficientnet-b{}'.format(i) for i in range(8)]), show_default=True) @click.option('-imb', '--imbalanced_correction', is_flag=True, help='Attempt to correct for imbalanced data.', show_default=True) @click.option('-imb2', '--imbalanced_correction2', is_flag=True, help='Attempt to correct for imbalanced data.', show_default=True) @click.option('-ca', '--classify_annotations', is_flag=True, help='Classify annotations.', show_default=True) diff --git a/pathflowai/models.py b/pathflowai/models.py index cf0528c..f2087f7 100644 --- a/pathflowai/models.py +++ b/pathflowai/models.py @@ -139,6 +139,11 @@ def generate_model(pretrain,architecture,num_classes, add_sigmoid=True, n_hidden else: model = EfficientNet.from_name(architecture, override_params=dict(num_classes=num_classes)) print(model) + elif architecture.startswith('sqnxt'): + from pytorchcv.model_provider import get_model as ptcv_get_model + model = ptcv_get_model(architecture, pretrained=pretrain) + num_ftrs=int(64*int(architecture.split('_')[-1][1])) + model.output=MLP(num_ftrs, [1000], dropout_p=0., n_outputs=num_classes, binary=add_sigmoid, softmax=False).mlp else: #for pretrained on imagenet model_names = [m for m in dir(models) if not m.startswith('__')] @@ -161,7 +166,7 @@ def generate_model(pretrain,architecture,num_classes, add_sigmoid=True, n_hidden #linear_layer = nn.Linear(num_ftrs, num_classes) #torch.nn.init.xavier_uniform(linear_layer.weight) model.fc = MLP(num_ftrs, [1000], dropout_p=0., n_outputs=num_classes, binary=add_sigmoid, softmax=False).mlp#nn.Sequential(*([linear_layer]+([nn.Sigmoid()] if (add_sigmoid) else []))) - elif architecture.startswith('alexnet') or architecture.startswith('vgg') or architecture.startswith('densenets'): + elif architecture.startswith('alexnet') or architecture.startswith('vgg') or architecture.startswith('densenet'): num_ftrs = model.classifier[6].in_features #linear_layer = nn.Linear(num_ftrs, num_classes) #torch.nn.init.xavier_uniform(linear_layer.weight) diff --git a/setup.py b/setup.py index a4006e7..76a05e2 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ 'blosc', 'numcodecs', 'zarr', + 'pytorchcv' ] with open('README.md','r', encoding='utf-8') as f: From 574a19ba2edb3bfe7b316c647b90439a86231f87 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Fri, 10 Jan 2020 15:49:46 -0500 Subject: [PATCH 18/27] Update models.py --- pathflowai/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pathflowai/models.py b/pathflowai/models.py index f2087f7..4d64afe 100644 --- a/pathflowai/models.py +++ b/pathflowai/models.py @@ -66,6 +66,9 @@ def __init__(self, n_input, hidden_topology, dropout_p, n_outputs=1, binary=True self.layers.append(nn.Sequential(self.output_layer,output_transform)) self.mlp = nn.Sequential(*self.layers) + def forward(self,x): + return self.mlp(x) + class FixedSegmentationModule(nn.Module): """Special model modification for segmentation tasks. Gets output from some of the models' forward loops. From 9ad06fad55dcb66796252d2f7e060bf3add0b3c7 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Sun, 12 Jan 2020 21:19:26 -0500 Subject: [PATCH 19/27] Update models.py --- pathflowai/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathflowai/models.py b/pathflowai/models.py index 4d64afe..e841a84 100644 --- a/pathflowai/models.py +++ b/pathflowai/models.py @@ -145,7 +145,7 @@ def generate_model(pretrain,architecture,num_classes, add_sigmoid=True, n_hidden elif architecture.startswith('sqnxt'): from pytorchcv.model_provider import get_model as ptcv_get_model model = ptcv_get_model(architecture, pretrained=pretrain) - num_ftrs=int(64*int(architecture.split('_')[-1][1])) + num_ftrs=int(128*int(architecture.split('_')[-1][1])) model.output=MLP(num_ftrs, [1000], dropout_p=0., n_outputs=num_classes, binary=add_sigmoid, softmax=False).mlp else: #for pretrained on imagenet From 5bc00bc3920b520686561babe14af6d92036c4ab Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Wed, 22 Jan 2020 20:54:28 -0500 Subject: [PATCH 20/27] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 51fc6c5..f385ef6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

Welcome to PathFlowAI 👋

+

Welcome to PathFlowAI

Version @@ -10,7 +10,7 @@ ### 🏠 [Homepage](https://github.com/jlevy44/PathFlowAI) -MedRxiv Manuscript: https://www.medrxiv.org/content/10.1101/19003897v1 +Published in the Proceedings of the Pacific Symposium for Biocomputing 2020, Manuscript: https://psb.stanford.edu/psb-online/proceedings/psb20/Levy.pdf ## Install From 8c7032e555b3c78c53face3f9c3602438dc246c4 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 9 Mar 2020 18:16:49 -0400 Subject: [PATCH 21/27] Added hdf5 support and ability in preprocessing to not output zarr format --- pathflowai/cli_preprocessing.py | 15 +++++++++------ pathflowai/model_training.py | 8 +++++++- pathflowai/utils.py | 31 ++++++++++++++++++------------- setup.py | 3 ++- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/pathflowai/cli_preprocessing.py b/pathflowai/cli_preprocessing.py index efedd04..d3590dc 100644 --- a/pathflowai/cli_preprocessing.py +++ b/pathflowai/cli_preprocessing.py @@ -50,10 +50,11 @@ def output_if_exists(filename): @click.option('-nn', '--n_neighbors', default=5, help='If adjusting mask, number of neighbors connectivity to remove.', show_default=True) @click.option('-bp', '--basic_preprocess', is_flag=True, help='Basic preprocessing pipeline, annotation areas are not saved. Used for benchmarking tool against comparable pipelines', show_default=True) @click.option('-ei', '--entire_image', is_flag=True, help='Store entire image in central db rather than patches.', show_default=True) -def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patches,threshold,patch_size, intensity_threshold, generate_finetune_segmentation, target_segmentation_class, target_threshold, out_db, adjust_mask, n_neighbors, basic_preprocess, entire_image): +@click.option('-nz', '--no_zarr', is_flag=True, help='Don\'t save zarr format file.', show_default=True) +def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patches,threshold,patch_size, intensity_threshold, generate_finetune_segmentation, target_segmentation_class, target_threshold, out_db, adjust_mask, n_neighbors, basic_preprocess, entire_image, no_zarr): """Preprocessing pipeline that accomplishes 3 things. 1: storage into ZARR format, 2: optional mask adjustment, 3: storage of patch-level information into SQL DB""" - for ext in ['.npy','.svs','.tiff','.tif', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.svslide', '.bif', '.jpeg', '.png']: + for ext in ['.npy','.svs','.tiff','.tif', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.svslide', '.bif', '.jpeg', '.png', '.h5']: svs_file = output_if_exists(join(input_dir,'{}{}'.format(basename,ext))) if svs_file != None: break @@ -75,13 +76,14 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche npy_mask=npy_mask, annotations=annotations, out_zarr=out_zarr, - out_pkl=out_pkl) + out_pkl=out_pkl, + no_zarr=no_zarr) if npy_mask==None and xml_file==None: npy_mask=join(input_dir,'{}_mask.npz'.format(basename)) target_segmentation_class=1 generate_finetune_segmentation=True - create_zero_mask(npy_mask,out_zarr,out_pkl) + create_zero_mask(npy_mask,out_zarr if no_zarr else svs_file,out_pkl) preprocess_point = time.time() @@ -93,7 +95,7 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche adj_npy=join(adj_dir,os.path.basename(npy_mask)) os.makedirs(adj_dir,exist_ok=True) if not os.path.exists(adj_npy): - adjust_mask(npy_mask, out_zarr, adj_npy, n_neighbors) + adjust_mask(npy_mask, out_zarr if no_zarr else svs_file, adj_npy, n_neighbors) adjust_point = time.time() print('Adjust took {}'.format(adjust_point-preprocess_point)) @@ -111,7 +113,8 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche target_threshold=target_threshold, adj_mask=adj_npy, basic_preprocess=basic_preprocess, - entire_image=entire_image) + entire_image=entire_image, + svs_file=svs_file) patch_point = time.time() print('Patches took {}'.format(patch_point-adjust_point)) diff --git a/pathflowai/model_training.py b/pathflowai/model_training.py index ec1c0cf..26c6b0d 100644 --- a/pathflowai/model_training.py +++ b/pathflowai/model_training.py @@ -169,7 +169,13 @@ def train_model_(training_opts): else: extract_embedding=training_opts['extract_embedding'] if extract_embedding: - trainer.model.fc = trainer.model.fc[0] + architecture=training_opts['architecture'] + if hasattr(trainer.model,"fc"): + trainer.model.fc = trainer.model.fc[0] + elif hasattr(trainer.model,"output"): + trainer.model.output = trainer.model.output[0] + elif architecture.startswith('alexnet') or architecture.startswith('vgg') or architecture.startswith('densenet'): + trainer.model.classifier[6]=trainer.model.classifier[6][0] trainer.bce=False y_pred = trainer.predict(dataloaders['test']) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index b816c88..cca4297 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -35,9 +35,8 @@ #import xarray as xr, sparse import pickle import copy - +import h5py import nonechucks as nc - from nonechucks import SafeDataLoader as DataLoader def load_sql_df(sql_file, patch_size): @@ -224,6 +223,9 @@ def create_sparse_annotation_arrays(xml_file, img_size, annotations=[]): interior_points_dict = {annotation:parse_coord_return_boxes(xml_file, annotation_name = annotation, return_coords = False) for annotation in annotations}#grab_interior_points(xml_file, img_size, annotations=annotations) if annotations else {} return {annotation:interior_points_dict[annotation] for annotation in annotations}#sparse.COO.from_scipy_sparse((sps.coo_matrix(interior_points_dict[annotation],img_size, dtype=np.uint8) if interior_points_dict[annotation] not None else sps.coo_matrix(img_size, dtype=np.uint8)).tocsr()) for annotation in annotations} # [sps.coo_matrix(img_size, dtype=np.uint8)]+ +def load_image(svs_file): + return (npy2da(svs_file) if (svs_file.endswith('.npy') or svs_file.endswith('.h5')) else svs2dask_array(svs_file, tile_size=1000, overlap=0)) + def load_process_image(svs_file, xml_file=None, npy_mask=None, annotations=[]): """Load SVS-like image (including NPY), segmentation/classification annotations, generate dask array and dictionary of annotations. @@ -246,7 +248,7 @@ def load_process_image(svs_file, xml_file=None, npy_mask=None, annotations=[]): Annotation masks. """ - arr = npy2da(svs_file) if svs_file.endswith('.npy') else svs2dask_array(svs_file, tile_size=1000, overlap=0)#load_image(svs_file) + arr = load_image(svs_file)#npy2da(svs_file) if (svs_file.endswith('.npy') or svs_file.endswith('.h5')) else svs2dask_array(svs_file, tile_size=1000, overlap=0)#load_image(svs_file) img_size = arr.shape[:2] masks = {}#{'purple': create_purple_mask(arr,img_size,sparse=False)} if xml_file is not None: @@ -261,7 +263,7 @@ def load_process_image(svs_file, xml_file=None, npy_mask=None, annotations=[]): #arr = da.concatenate([arr,masks.pop('purple')],axis=2) return arr, masks#xr.Dataset.from_dict({k:v for k,v in list(data_arr.items())+list(purple_arr.items())+list(mask_arr.items())})#list(dict(image=data_arr,purple=purple_arr,annotations=mask_arr).items()))#arr, masks -def save_dataset(arr, masks, out_zarr, out_pkl): +def save_dataset(arr, masks, out_zarr, out_pkl, no_zarr): """Saves dask array image, dictionary of annotations to zarr and pickle respectively. Parameters @@ -275,13 +277,14 @@ def save_dataset(arr, masks, out_zarr, out_pkl): out_pkl:str Pickle output file. """ - arr.astype('uint8').to_zarr(out_zarr, overwrite=True) + if no_zarr: + arr.astype('uint8').to_zarr(out_zarr, overwrite=True) pickle.dump(masks,open(out_pkl,'wb')) #dataset.to_netcdf(out_netcdf, compute=False) #pickle.dump(dataset, open(out_pkl,'wb'), protocol=-1) -def run_preprocessing_pipeline(svs_file, xml_file=None, npy_mask=None, annotations=[], out_zarr='output_zarr.zarr', out_pkl='output.pkl'): +def run_preprocessing_pipeline(svs_file, xml_file=None, npy_mask=None, annotations=[], out_zarr='output_zarr.zarr', out_pkl='output.pkl',no_zarr=False): """Run preprocessing pipeline. Store image into zarr format, segmentations maintain as npy, and xml annotations as pickle. Parameters @@ -301,7 +304,7 @@ def run_preprocessing_pipeline(svs_file, xml_file=None, npy_mask=None, annotatio """ #save_dataset(load_process_image(svs_file, xml_file, npy_mask, annotations), out_netcdf) arr, masks = load_process_image(svs_file, xml_file, npy_mask, annotations) - save_dataset(arr, masks,out_zarr, out_pkl) + save_dataset(arr, masks,out_zarr, out_pkl, no_zarr) ################### @@ -380,7 +383,7 @@ def load_dataset(in_zarr, in_pkl): Annotations dictionary. """ - return da.from_zarr(in_zarr), pickle.load(open(in_pkl,'rb'))#xr.open_dataset(in_netcdf) + return (da.from_zarr(in_zarr) if in_zarr.endswith('.zarr') else load_image(in_zarr)), pickle.load(open(in_pkl,'rb'))#xr.open_dataset(in_netcdf) def is_valid_patch(xs,ys,patch_size,purple_mask,intensity_threshold,threshold=0.5): """Deprecated, computes whether patch is valid.""" @@ -400,7 +403,7 @@ def fix_polygon(poly): return poly #@pysnooper.snoop("extract_patch.log") -def extract_patch_information(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0., adj_mask='', basic_preprocess=False, tries=0, entire_image=False): +def extract_patch_information(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0., adj_mask='', basic_preprocess=False, tries=0, entire_image=False, svs_file=''): """Final step of preprocessing pipeline. Break up image into patches, include if not background and of a certain intensity, find area of each annotation type in patch, spatial information, image ID and dump data to SQL table. Parameters @@ -459,8 +462,9 @@ def extract_patch_information(basename, input_dir='./', annotations=[], threshol #cluster.adapt(minimum=10, maximum=100) #cluster = LocalCluster(threads_per_worker=1, n_workers=20, memory_limit="80G") #client=Client()#Client(cluster)#processes=True)#cluster, - - arr, masks = load_dataset(join(input_dir,'{}.zarr'.format(basename)),join(input_dir,'{}_mask.pkl'.format(basename))) + in_zarr=join(input_dir,'{}.zarr'.format(basename)) + in_zarr=(in_zarr if os.path.exists(in_zarr) else svs_file) + arr, masks = load_dataset(in_zarr,join(input_dir,'{}_mask.pkl'.format(basename))) if 'annotations' in masks: segmentation = True @@ -705,10 +709,11 @@ def npy2da(npy_file): arr=da.from_array(np.load(npy_file, mmap_mode = 'r+')) else: npy_file=npy_file.replace('.npy','.npz') - if npy_file.endswith('.npz'): + elif npy_file.endswith('.npz'): from scipy.sparse import load_npz arr=da.from_array(load_npz(npy_file).toarray()) - + elif npy_file.endswith('.h5'): + arr=da.from_array(h5py.File(savename, 'r')['dataset']) return arr def grab_interior_points(xml_file, img_size, annotations=[]): diff --git a/setup.py b/setup.py index 76a05e2..9d12e57 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,8 @@ 'blosc', 'numcodecs', 'zarr', - 'pytorchcv' + 'pytorchcv', + 'h5py' ] with open('README.md','r', encoding='utf-8') as f: From 176b099146d1859369366f9540268c0ea5306713 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 9 Mar 2020 18:40:31 -0400 Subject: [PATCH 22/27] Update utils.py --- pathflowai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index cca4297..ca9b93d 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -713,7 +713,7 @@ def npy2da(npy_file): from scipy.sparse import load_npz arr=da.from_array(load_npz(npy_file).toarray()) elif npy_file.endswith('.h5'): - arr=da.from_array(h5py.File(savename, 'r')['dataset']) + arr=da.from_array(h5py.File(npy_file, 'r')['dataset']) return arr def grab_interior_points(xml_file, img_size, annotations=[]): From 7ff099037b829dd952bc9148248639ffeaf25300 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 9 Mar 2020 18:45:48 -0400 Subject: [PATCH 23/27] Update utils.py --- pathflowai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index ca9b93d..48d7a0f 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -277,7 +277,7 @@ def save_dataset(arr, masks, out_zarr, out_pkl, no_zarr): out_pkl:str Pickle output file. """ - if no_zarr: + if not no_zarr: arr.astype('uint8').to_zarr(out_zarr, overwrite=True) pickle.dump(masks,open(out_pkl,'wb')) From 0e9d9c863f2193493c4eafe0ccb2c321539abf34 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 9 Mar 2020 18:48:42 -0400 Subject: [PATCH 24/27] Update utils.py --- pathflowai/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index 48d7a0f..a342f38 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -535,7 +535,7 @@ def extract_patch_information(basename, input_dir='./', annotations=[], threshol print(patch_info) return patch_info -def generate_patch_pipeline(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, out_db='patch_info.db', generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0., adj_mask='', basic_preprocess=False, entire_image=False): +def generate_patch_pipeline(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, out_db='patch_info.db', generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0., adj_mask='', basic_preprocess=False, entire_image=False,svs_file=''): """Find area coverage of each annotation in each patch and store patch information into SQL db. Parameters @@ -565,7 +565,7 @@ def generate_patch_pipeline(basename, input_dir='./', annotations=[], threshold= basic_preprocess:bool Do not store patch level information. """ - patch_info = extract_patch_information(basename, input_dir, annotations, threshold, patch_size, generate_finetune_segmentation=generate_finetune_segmentation, target_class=target_class, intensity_threshold=intensity_threshold, target_threshold=target_threshold, adj_mask=adj_mask, basic_preprocess=basic_preprocess, entire_image=entire_image) + patch_info = extract_patch_information(basename, input_dir, annotations, threshold, patch_size, generate_finetune_segmentation=generate_finetune_segmentation, target_class=target_class, intensity_threshold=intensity_threshold, target_threshold=target_threshold, adj_mask=adj_mask, basic_preprocess=basic_preprocess, entire_image=entire_image,svs_file=svs_file) conn = sqlite3.connect(out_db) patch_info.to_sql(str(patch_size), con=conn, if_exists='append') conn.close() From a678ef21a62130ce98594cd2cecb03652e354eb4 Mon Sep 17 00:00:00 2001 From: jlevy44 Date: Mon, 9 Mar 2020 18:59:24 -0400 Subject: [PATCH 25/27] Debug no annotations --- pathflowai/cli_preprocessing.py | 1 + pathflowai/utils.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pathflowai/cli_preprocessing.py b/pathflowai/cli_preprocessing.py index d3590dc..a020b26 100644 --- a/pathflowai/cli_preprocessing.py +++ b/pathflowai/cli_preprocessing.py @@ -80,6 +80,7 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche no_zarr=no_zarr) if npy_mask==None and xml_file==None: + print('Generating Zero Mask') npy_mask=join(input_dir,'{}_mask.npz'.format(basename)) target_segmentation_class=1 generate_finetune_segmentation=True diff --git a/pathflowai/utils.py b/pathflowai/utils.py index a342f38..927e50e 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -383,7 +383,11 @@ def load_dataset(in_zarr, in_pkl): Annotations dictionary. """ - return (da.from_zarr(in_zarr) if in_zarr.endswith('.zarr') else load_image(in_zarr)), pickle.load(open(in_pkl,'rb'))#xr.open_dataset(in_netcdf) + if not os.path.exists(in_pkl): + annotations={'annotations':''} + else: + annotations=pickle.load(open(in_pkl,'rb')) + return (da.from_zarr(in_zarr) if in_zarr.endswith('.zarr') else load_image(in_zarr)), annotations#xr.open_dataset(in_netcdf) def is_valid_patch(xs,ys,patch_size,purple_mask,intensity_threshold,threshold=0.5): """Deprecated, computes whether patch is valid.""" @@ -453,7 +457,7 @@ def extract_patch_information(basename, input_dir='./', annotations=[], threshol from functools import reduce #from distributed import Client,LocalCluster max_tries=4 - kargs=dict(basename=basename, input_dir=input_dir, annotations=annotations, threshold=threshold, patch_size=patch_size, generate_finetune_segmentation=generate_finetune_segmentation, target_class=target_class, intensity_threshold=intensity_threshold, target_threshold=target_threshold, adj_mask=adj_mask, basic_preprocess=basic_preprocess, tries=tries) + kargs=dict(basename=basename, input_dir=input_dir, annotations=annotations, threshold=threshold, patch_size=patch_size, generate_finetune_segmentation=generate_finetune_segmentation, target_class=target_class, intensity_threshold=intensity_threshold, target_threshold=target_threshold, adj_mask=adj_mask, basic_preprocess=basic_preprocess, tries=tries, svs_file=svs_file) try: #, # 'distributed.scheduler.allowed-failures':20, From 7ab3722e5d562bc01abf81c9888a8ba20dbc2eef Mon Sep 17 00:00:00 2001 From: Joshua Levy Date: Mon, 9 Mar 2020 19:08:14 -0400 Subject: [PATCH 26/27] Update cli_preprocessing.py --- pathflowai/cli_preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pathflowai/cli_preprocessing.py b/pathflowai/cli_preprocessing.py index a020b26..04bb5f5 100644 --- a/pathflowai/cli_preprocessing.py +++ b/pathflowai/cli_preprocessing.py @@ -84,7 +84,7 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche npy_mask=join(input_dir,'{}_mask.npz'.format(basename)) target_segmentation_class=1 generate_finetune_segmentation=True - create_zero_mask(npy_mask,out_zarr if no_zarr else svs_file,out_pkl) + create_zero_mask(npy_mask,out_zarr if not no_zarr else svs_file,out_pkl) preprocess_point = time.time() @@ -96,7 +96,7 @@ def preprocess_pipeline(img2npy,basename,input_dir,annotations,preprocess,patche adj_npy=join(adj_dir,os.path.basename(npy_mask)) os.makedirs(adj_dir,exist_ok=True) if not os.path.exists(adj_npy): - adjust_mask(npy_mask, out_zarr if no_zarr else svs_file, adj_npy, n_neighbors) + adjust_mask(npy_mask, out_zarr if not no_zarr else svs_file, adj_npy, n_neighbors) adjust_point = time.time() print('Adjust took {}'.format(adjust_point-preprocess_point)) From 234bca664489a0b1ddbea35a86d12b91870604b2 Mon Sep 17 00:00:00 2001 From: Joshua Levy Date: Mon, 9 Mar 2020 19:16:02 -0400 Subject: [PATCH 27/27] Update utils.py --- pathflowai/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pathflowai/utils.py b/pathflowai/utils.py index 927e50e..3f4001b 100644 --- a/pathflowai/utils.py +++ b/pathflowai/utils.py @@ -471,9 +471,10 @@ def extract_patch_information(basename, input_dir='./', annotations=[], threshol arr, masks = load_dataset(in_zarr,join(input_dir,'{}_mask.pkl'.format(basename))) if 'annotations' in masks: segmentation = True - #if generate_finetune_segmentation: - segmentation_mask = npy2da(join(input_dir,'{}_mask.npy'.format(basename)) if not adj_mask else adj_mask) + mask=join(input_dir,'{}_mask.npy'.format(basename)) + mask = (mask if os.path.exists(mask) else mask.replace('.npy','.npz')) + segmentation_mask = (npy2da(mask) if not adj_mask else adj_mask) else: segmentation = False annotations=list(annotations)