From c9a42a311d8adb813f38ad77f18e6ea9b8703886 Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Mon, 25 May 2020 14:23:58 +0200 Subject: [PATCH 1/8] Updated main.py to match Mathias main.py script (lymphome noise project testing) --- scripts/main.py | 88 ++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/scripts/main.py b/scripts/main.py index 4b2c129..ac839dc 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,35 +1,42 @@ -from CAAI.train import CNN -from CAAI.predict import CNN as TrainedModel +# Import python libraries: +from train import CNN +from predict import CNN as TrainedModel import pickle, os import numpy as np from data_generator import DataGenerator import pyminc.volumes.factory as pyminc +data_path = '/homes/kovacs/project_data/lymphoma-auto-contouring/' + +# Define training function: def train_v1(): - cnn = CNN(model_name='v1', - input_patch_shape=(128,128,16), - input_channels=2, - output_channels=1, - batch_size=2, - epochs=2000, - learning_rate=1e-4, - checkpoint_save_rate=50, - loss_functions=[['mean_absolute_error',1]], - data_pickle='/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc/data_6fold.pickle', - data_folder='/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc', - data_pickle_kfold=1 + # Parameter definition for the network. These must be changed to fit your own data. Other parameters exist. See train.py + cnn = CNN(model_name='v2_test_david', + input_patch_shape=(256,256,8), # Dimensions of model input in the form (x,y,z) + input_channels=2, # Number of model input channels. In this case, PET in 1st channel, CT in 2nd channel. + output_channels=1, # Number of model output channels. In this case, a PET image as output. + batch_size=1, # Number of inputs used in batch normalization. 1 = no batch normalization. + epochs=100, # Number of training epochs. A stopping critera can also be implemented. + learning_rate=1e-4, # Set learning rate. + checkpoint_save_rate=50, # Saves model each # epoch. + loss_functions=[['mean_absolute_error',1]], # Define loss function + data_pickle= data_path+'patient_data_preprocessed/data_2fold.pickle', # K-fold split file + data_folder= data_path+'patient_data_preprocessed', + data_pickle_kfold=0 # K-fold. For 2-fold validation, each fold must be run subsequently. (First fold 0, then fold 1) ) # Attach generator - cnn.data_loader = DataGenerator(cnn.config) + cnn.data_loader = DataGenerator(cnn.config) # Load data generator - cnn.print_config() + cnn.print_config() # Print network configurations - final_model_name = cnn.train() + final_model_name = cnn.train() # Create training network return final_model_name - + + +# Define main prediction function: def predict(modelh5name, model_name=None): modelbasename = os.path.splitext(os.path.basename(modelh5name))[0] @@ -39,40 +46,45 @@ def predict(modelh5name, model_name=None): if model_name: modelbasename = model_name - summary = pickle.load( open('/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc/data_6fold.pickle', 'rb') ) - for pt in summary['valid_1']: + # Load k-fold file. + summary = pickle.load( open(data_path+'patient_data_preprocessed/data_2fold.pickle', 'rb') ) + for pt in summary['valid_0']: # K-fold. For 2-fold validation, each fold must be run subsequently. (First valid_0, then valid_1) predict_patient(pt,model,modelbasename) + +# Define single patient prediction function: def predict_patient(pt,model,modelbasename): - _lowdose_name = "FDG_01_SUV.mnc" - data_folder = '/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc' - fname_dat = os.path.join(data_folder,pt,'dat_01_suv_ctnorm_double.npy') + _lowdose_name = "minc/"+pt+"PET_TrueX1_reshaped.mnc" + data_folder = data_path+'patient_data_preprocessed' + fname_dat = os.path.join(data_folder,pt,'minc/dat_256_truex1_256_CT.npy') dat = np.memmap(fname_dat, dtype='double', mode='r') - dat = dat.reshape(128,128,-1,2) + dat = dat.reshape(256,256,-1,2) print("Predicting volume for %s" % pt) - predicted = np.empty((111,128,128)) - x = 128 - y = 128 - z = 16 - d = 2 - for z_index in range(int(z/2),111-int(z/2)): - predicted_stack = model.predict(dat[:,:,z_index-int(z/2):z_index+int(z/2),:].reshape(1,x,y,z,d)) - if z_index == int(z/2): + predicted = np.empty((111,256,256)) # Create empty matrix, which will be filled with predicted data. + x,y,z,d = 256,256,8,2 # Define dimensions. + + for z_index in range(int(z/2),111-int(z/2)): # Loop over empty matrix. + predicted_stack = model.predict(dat[:,:,z_index-int(z/2):z_index+int(z/2),:].reshape(1,x,y,z,d)) # Predict data slice. + if z_index == int(z/2): # Handle edge case. for ind in range(int(z/2)): - predicted[ind,:,:] = predicted_stack[0,:,:,ind].reshape(128,128) - if z_index == 111-int(z/2)-1: + predicted[ind,:,:] = predicted_stack[0,:,:,ind].reshape(256,256) # Fill out matrix with prediction. + if z_index == 111-int(z/2)-1: # Handle edge case. for ind in range(int(z/2)): - predicted[z_index+ind,:,:] = predicted_stack[0,:,:,int(z/2)+ind].reshape(128,128) - predicted[z_index,:,:] = predicted_stack[0,:,:,int(z/2)].reshape(128,128) + predicted[z_index+ind,:,:] = predicted_stack[0,:,:,int(z/2)+ind].reshape(256,256) # Fill out matrix with prediction. + predicted[z_index,:,:] = predicted_stack[0,:,:,int(z/2)].reshape(256,256) # Fill out matrix with prediction. predicted_full = predicted predicted_full += np.swapaxes(np.swapaxes(dat[:,:,:,0],2,1),1,0) - out_vol = pyminc.volumeLikeFile(os.path.join(data_folder,pt,_lowdose_name),os.path.join(data_folder,pt,'predicted_'+modelbasename+'_'+_lowdose_name)) + # Create minc file of predicted data. + out_vol = pyminc.volumeLikeFile(os.path.join(data_folder,pt,_lowdose_name),os.path.join(data_folder,pt,"minc",'predicted_'+modelbasename+'_'+_lowdose_name[5:])) out_vol.data = predicted_full out_vol.writeFile() out_vol.closeVolume() + +# Start training of model and prediction of test data: if __name__ == '__main__': model_name = train_v1() - predict(model_name) \ No newline at end of file + predict(model_name) + #predict('v1_e10_bz1_lr1.0E-04_noDA_noTL_LOG0.h5') \ No newline at end of file From 495f2fb248dc6a15ab0b0272a5e95d9f29fc54de Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Mon, 25 May 2020 14:33:16 +0200 Subject: [PATCH 2/8] Updated all scripts in CAAI/CNN/scipts and CAAI/CNN/pyhtontoolkit to match Mathias version --- pythontoolkit/losses.py | 4 +-- pythontoolkit/networks.py | 37 +++++++++++---------- pythontoolkit/predict.py | 1 - pythontoolkit/train.py | 36 ++++++++++---------- scripts/data_generator.py | 34 ++++++++----------- scripts/generate_data_pickle.py | 59 +++++++++------------------------ 6 files changed, 70 insertions(+), 101 deletions(-) diff --git a/pythontoolkit/losses.py b/pythontoolkit/losses.py index 278d5e0..685f51d 100644 --- a/pythontoolkit/losses.py +++ b/pythontoolkit/losses.py @@ -1,5 +1,5 @@ from keras import backend as K +# Define root mean sqared error loss function. def rmse(y_true, y_pred): - return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) - + return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) \ No newline at end of file diff --git a/pythontoolkit/networks.py b/pythontoolkit/networks.py index 49124d5..75cfdf6 100644 --- a/pythontoolkit/networks.py +++ b/pythontoolkit/networks.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- """ Created on Thu May 24 10:57:19 2018 @@ -15,7 +13,10 @@ warnings.filterwarnings('ignore') +# Define u-net structure. def unet(X, f, dims_out): + + # Define convolution block: def conv_block(layer,fsize,dropout,downsample=True): for i in range(1,3): layer = Conv3D(fsize, kernel_size=3, kernel_regularizer=regularizers.l2(1e-1), @@ -30,6 +31,7 @@ def conv_block(layer,fsize,dropout,downsample=True): downsample = Activation('relu')(downsample) return layer, downsample + # Define transposed convolution block: def convt_block(layer, concat, fsize): layer = Conv3DTranspose(fsize, kernel_size=3, kernel_regularizer=regularizers.l2(1e-1), kernel_initializer='he_normal', padding='same', strides=2)(layer) @@ -38,26 +40,27 @@ def convt_block(layer, concat, fsize): layer = concatenate([layer, concat], axis=-1) return layer + # Dropout values + dropout = [.1,.1,.2,.3,.2,.2,.1] + # ENCODING - block1, dblock1 = conv_block(X,f,.1) - block2, dblock2 = conv_block(dblock1,f*2**1,.1) - block3, dblock3 = conv_block(dblock2,f*2**2,.2) - block4, dblock4 = conv_block(dblock3,f*2**3,.2) - block5, _ = conv_block(dblock4,f*2**4,.3,downsample=False) + block1, dblock1 = conv_block(X,f,dropout[0]) + block2, dblock2 = conv_block(dblock1,f*2**1,dropout[1]) + block3, dblock3 = conv_block(dblock2,f*2**2,dropout[2]) + block4, _ = conv_block(dblock3,f*2**3,dropout[3],downsample=False) # DECODING - block7 = convt_block(block5,block4,f*2**3) - block8, _ = conv_block(block7,f*2**3,.3,downsample=False) + block5 = convt_block(block4,block3,f*2**2) + block6, _ = conv_block(block5,f*2**2,dropout[4],downsample=False) - block9 = convt_block(block8,block3,f*2**2) - block10, _ = conv_block(block9,f*2**2,.2,downsample=False) + block7 = convt_block(block6,block2,f*2**1) + block8, _ = conv_block(block7,f*2**1,dropout[5],downsample=False) - block11 = convt_block(block10,block2,f*2**1) - block12, _ = conv_block(block11,f*2**1,.2,downsample=False) - - block13 = convt_block(block12,block1,f) - block14, _ = conv_block(block13,f,.1,downsample=False) + block9 = convt_block(block8,block1,f) + block10, _ = conv_block(block9,f,dropout[6],downsample=False) output = Conv3D(dims_out,kernel_size=3, kernel_regularizer=regularizers.l2(1e-1), - kernel_initializer='he_normal', padding='same',strides=1, activation='relu')(block14) + kernel_initializer='he_normal', padding='same',strides=1, activation='relu')(block10) + return output + diff --git a/pythontoolkit/predict.py b/pythontoolkit/predict.py index 795e167..b58487a 100644 --- a/pythontoolkit/predict.py +++ b/pythontoolkit/predict.py @@ -17,7 +17,6 @@ def __init__(self,model,config=None,custom_objects={}): else: self.model = load_model(model,custom_objects=custom_objects) - def load_model_w_json(self,model): modelh5name = os.path.join( os.path.dirname(model), os.path.splitext(os.path.basename(model))[0]+'.h5' ) json_file = open(model,'r') diff --git a/pythontoolkit/train.py b/pythontoolkit/train.py index a32d24d..ef9a157 100644 --- a/pythontoolkit/train.py +++ b/pythontoolkit/train.py @@ -1,9 +1,10 @@ +# Import python libraries: import warnings warnings.filterwarnings('ignore') import os import pickle from glob import glob -from CAAI import networks +import networks import json from keras.callbacks import ModelCheckpoint, TensorBoard from keras.layers import Input @@ -11,27 +12,19 @@ from keras.optimizers import Adam - -""" - -TODO: - - Check content of existing CONFIG matches new run if continue - - Delete checkpoints if running overwriting already trained model. - - Generate data pickle file - - -""" - +# Define Convolutional Neural Network class. class CNN(): + + # Define default configurations, which will be used if no other configurations are defined. def __init__(self,**kwargs): self.config = dict() self.config["model_name"] = 'PROJECT_NAME_WITH_VERSION_NUMBER' self.config["overwrite"] = False - self.config["input_patch_shape"] = (16,192,240) + self.config["input_patch_shape"] = (8,256,256) self.config["input_channels"] = 2 self.config["output_channels"] = 1 - self.config["batch_size"] = 2 - self.config["epochs"] = 100 + self.config["batch_size"] = 1 + self.config["epochs"] = 1000 self.config["checkpoint_save_rate"] = 10 self.config["initial_epoch"] = 0 self.config["learning_rate"] = 1e-4 @@ -39,7 +32,7 @@ def __init__(self,**kwargs): self.config["data_pickle"] = '' # Path to pickle containing train/validation splits self.config["data_pickle_kfold"] = None # Set to fold if k-fold training is applied (key will e.g. be train_0 and valid_0) self.config["pretrained_model"] = None # If transfer learning from other model (not used if resuming training, but keep for model_name's sake) - self.config["augmentation"] = True + self.config["augmentation"] = False self.config["augmentation_params"] = { #'rotation_range': [5,5,5], 'shift_range': [0.05,0.05,0.05], @@ -57,7 +50,7 @@ def __init__(self,**kwargs): # Config specific for network architecture self.config["network_architecture"] = 'unet' - self.config['n_base_filters'] = 32 + self.config['n_base_filters'] = 64 self.custom_network_architecture = None # Metrics and loss functions @@ -98,6 +91,7 @@ def setup_callbacks(self): TB = TensorBoard(log_dir = TB_file) return [checkpoint, TB] + def compile_network(self): @@ -130,6 +124,7 @@ def compile_network(self): self.model.compile(loss = loss, loss_weights = loss_weights, optimizer = optimizer, metrics=self.metrics) self.is_compiled = True + def load_model_w_json(self,model): modelh5name = os.path.join( os.path.dirname(model), os.path.splitext(os.path.basename(model))[0]+'.h5' ) @@ -140,6 +135,7 @@ def load_model_w_json(self,model): model.load_weights(modelh5name) return model + def build_network(self,inputs=None): if not inputs: inputs = Input(shape=self.config['input_patch_shape']+(self.config['input_channels'],)) @@ -156,6 +152,7 @@ def build_network(self,inputs=None): return Model(inputs=inputs,outputs=outputs) + def generate_model_name_from_params(self): # Build full model name model_name = self.config['model_name'] @@ -169,11 +166,13 @@ def generate_model_name_from_params(self): return model_name + def get_initial_epoch_from_file(self,f): last_epoch = f.split('/')[-1].split('_')[0] assert last_epoch.startswith('e') # check that it is indeed the epoch part of the name that we extract return int(last_epoch[1:]) # extract only integer part of eXXX + def check_model_existance(self): # Check if config file already exists if os.path.exists( 'configs/{}.pkl'.format( self.config['model_name'] ) ): @@ -198,12 +197,15 @@ def check_model_existance(self): # else -> model exists but we specified to overwrite, so we do so, without loading from the checkpoint folder. # OBS: The checkpoints should probably be cleared before starting? + def print_config(self): print(json.dumps(self.config, indent = 4)) + def set(self,key,value): self.config[key] = value + def train(self): # Compile network if it has not been done: diff --git a/scripts/data_generator.py b/scripts/data_generator.py index 25ff64e..4727040 100644 --- a/scripts/data_generator.py +++ b/scripts/data_generator.py @@ -1,17 +1,8 @@ - -from CAAI.DataAugmentation3D import DataAugmentation3D +# Import python libraries: +from DataAugmentation3D import DataAugmentation3D import pickle import numpy as np -""" - -############## -Data Generator -############## - -Please update the dat and tgt filenames, as well as matrix size and how the stacks are extracted - -""" class DataGenerator(): @@ -27,9 +18,9 @@ def __init__(self, config): self.summary = pickle.load( open(config['data_pickle'], 'rb') ) self.data_folder = config['data_folder'] - # HARCODED FILENAMES! - self.dat_name = 'dat_01_suv_ctnorm_double.npy' - self.tgt_name = 'res_01_suv_double.npy' + # HARCODED FILENAMES. Change to fit your own data. + self.dat_name = 'minc/dat_256_truex1_256_CT.npy' + self.tgt_name = 'minc/res_256_truex1_256_CT.npy' self.n_batches = len(self.summary['train']) if 'train' in self.summary else len(self.summary['train_0']) self.n_batches /= self.batch_size @@ -69,18 +60,21 @@ def load(self, mode, z=None, return_studyid=False, load_mode='npy'): tgt = np.load(fname_tgt) elif load_mode == 'memmap': dat = np.memmap(fname_dat, dtype='double', mode='r') - dat = dat.reshape(128,128,-1,2) + dat = dat.reshape(256,256,-1,2) tgt = np.memmap(fname_tgt, dtype='double', mode='r') - tgt = tgt.reshape(128,128,-1) + tgt = tgt.reshape(256,256,-1) # --- Determine slice if z == None: - z = np.random.randint(8,111-8,1)[0] + z = np.random.randint(4,111-4,1)[0] - dat_stack = dat[:,:,z-8:z+8,:] - tgt_stack = tgt[:,:,z-8:z+8] + dat_stack = dat[:,:,z-4:z+4,:] + tgt_stack = tgt[:,:,z-4:z+4] if return_studyid: return dat_stack, tgt_stack, stats else: - return dat_stack, tgt_stack \ No newline at end of file + return dat_stack, tgt_stack + + + \ No newline at end of file diff --git a/scripts/generate_data_pickle.py b/scripts/generate_data_pickle.py index 4920e2e..d1ff8d1 100644 --- a/scripts/generate_data_pickle.py +++ b/scripts/generate_data_pickle.py @@ -1,57 +1,28 @@ -# -*- coding: utf-8 -*- - -import pickle +# Import python libraries: +from sklearn.model_selection import KFold +import numpy as np import os +import pickle -""" - -Save pickle file of train and validation pts -Should have indexes "train" and "test" or "train_X" and "valid_X" -where X is integer from 0, representing the LOG in k-fold. - -""" - -summary = { 'train': [], 'valid': [] } - -pts = os.listdir('/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc') - -summary['train'].append(pts[0]) -summary['train'].append(pts[1]) -summary['valid'].append(pts[10]) -summary['valid'].append(pts[11]) - -with open('test_dat.pickle', 'wb') as file_pi: - pickle.dump(summary,file_pi) - - -""" - -############################################################# - -Example below show code for splitting 6 fold cross validation -Note: No exsplicit care is taken so that double scan of patients are not both in train and validation. - If that is the case for your project - this has to be handled !! - -############################################################# -datafolder='/users/claes/projects/LowdosePET/PETrecon/HjerteFDG_mnc' +# Create 2-fold split of the example dataset: +datafolder='/users/mathias/deeplearning_guide/patient_data_only_dicom' -patients = [f for f in os.listdir(datafolder) if os.path.exists(os.path.join(datafolder,f,'FDG_100_SUV.mnc'))] +patients = [f for f in os.listdir(datafolder)] patients = np.array(patients) -from sklearn.model_selection import KFold -import numpy as np -import os -import pickle - -kf = KFold(n_splits=6,shuffle=True) +# Define splits +kf = KFold(n_splits=2,shuffle=True) kf.get_n_splits(patients) +# Fill out split dictionary data = {} for G, (train,test) in enumerate(kf.split(patients)): print(G,len(train),len(test)) data['train_%d' % G] = patients[train] data['valid_%d' % G] = patients[test] - -pickle.dump( data, open('data_6fold.pickle','wb') ) -""" \ No newline at end of file + +# Save k-fold split file +pickle.dump( data, open('/users/mathias/deeplearning_guide/patient_data_only_dicom/data_2fold.pickle','wb') ) + + From 4509f206916c41d1af7d920ede224e9ed5af2499 Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Mon, 25 May 2020 14:54:53 +0200 Subject: [PATCH 3/8] added reinstall.sh file --- reinstall.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 reinstall.sh diff --git a/reinstall.sh b/reinstall.sh new file mode 100644 index 0000000..531cde6 --- /dev/null +++ b/reinstall.sh @@ -0,0 +1,6 @@ +#reinstall the CNN package +#by running ./reinstall.sh in command-line +mkdir build +cd build +cmake .. +make install \ No newline at end of file From b0298cb80715ad318053832963b359ce17c563c4 Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Mon, 25 May 2020 15:24:15 +0200 Subject: [PATCH 4/8] updated files to refer to CAAI package at /homes/kovacs/toolbox --- pythontoolkit/train.py | 2 +- reinstall.sh | 6 ------ scripts/data_generator.py | 2 +- scripts/main.py | 4 ++-- 4 files changed, 4 insertions(+), 10 deletions(-) delete mode 100644 reinstall.sh diff --git a/pythontoolkit/train.py b/pythontoolkit/train.py index ef9a157..c579571 100644 --- a/pythontoolkit/train.py +++ b/pythontoolkit/train.py @@ -4,7 +4,7 @@ import os import pickle from glob import glob -import networks +import CAAI.networks import json from keras.callbacks import ModelCheckpoint, TensorBoard from keras.layers import Input diff --git a/reinstall.sh b/reinstall.sh deleted file mode 100644 index 531cde6..0000000 --- a/reinstall.sh +++ /dev/null @@ -1,6 +0,0 @@ -#reinstall the CNN package -#by running ./reinstall.sh in command-line -mkdir build -cd build -cmake .. -make install \ No newline at end of file diff --git a/scripts/data_generator.py b/scripts/data_generator.py index 4727040..af6b39d 100644 --- a/scripts/data_generator.py +++ b/scripts/data_generator.py @@ -1,5 +1,5 @@ # Import python libraries: -from DataAugmentation3D import DataAugmentation3D +from CAAI.DataAugmentation3D import DataAugmentation3D import pickle import numpy as np diff --git a/scripts/main.py b/scripts/main.py index ac839dc..1d07538 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,6 +1,6 @@ # Import python libraries: -from train import CNN -from predict import CNN as TrainedModel +from CAAI.train import CNN +from CAAI.predict import CNN as TrainedModel import pickle, os import numpy as np from data_generator import DataGenerator From 62e8488950b16310d435de5bddc2f9b5b2b799a9 Mon Sep 17 00:00:00 2001 From: davidkvcs <37078212+davidkvcs@users.noreply.github.com> Date: Mon, 25 May 2020 15:28:41 +0200 Subject: [PATCH 5/8] adding the reinstall file to git --- reinstall.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 reinstall.sh diff --git a/reinstall.sh b/reinstall.sh new file mode 100644 index 0000000..eeea5b4 --- /dev/null +++ b/reinstall.sh @@ -0,0 +1,6 @@ +#reinstall the CNN package +#by running ./reinstall.sh in command-line +mkdir build +cd build +cmake .. +make install From 3aea47160c197036ec8c997db3c0fda55a961b53 Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Wed, 27 May 2020 09:41:42 +0200 Subject: [PATCH 6/8] Changed line 7 in train.py to "crom CAAI import networks" --- pythontoolkit/train.py | 2 +- reinstall.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 reinstall.sh diff --git a/pythontoolkit/train.py b/pythontoolkit/train.py index c579571..791c3de 100644 --- a/pythontoolkit/train.py +++ b/pythontoolkit/train.py @@ -4,7 +4,7 @@ import os import pickle from glob import glob -import CAAI.networks +from CAAI import networks import json from keras.callbacks import ModelCheckpoint, TensorBoard from keras.layers import Input diff --git a/reinstall.sh b/reinstall.sh old mode 100644 new mode 100755 From 15938d1de3b7f9032a9f0597af818ea9110b6e23 Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Wed, 27 May 2020 18:32:34 +0200 Subject: [PATCH 7/8] added import tensorflow as tf and tf.keras... to all files where necessary (dataaug, losses, networks, predict, train) --- pythontoolkit/DataAugmentation3D.py | 5 +++-- pythontoolkit/losses.py | 3 ++- pythontoolkit/networks.py | 12 ++++++------ pythontoolkit/predict.py | 3 ++- pythontoolkit/train.py | 9 +++++---- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pythontoolkit/DataAugmentation3D.py b/pythontoolkit/DataAugmentation3D.py index fc8f1c0..ed3dddb 100755 --- a/pythontoolkit/DataAugmentation3D.py +++ b/pythontoolkit/DataAugmentation3D.py @@ -5,11 +5,12 @@ Fairly basic set of tools for real-time data augmentation on the volumetric data. Extended for 3D objects augmentation. """ +import tensorflow as tf -import keras.backend as K +import tf.keras.backend as K import numpy as np import scipy.ndimage -from keras.utils.data_utils import Sequence +from tf.keras.utils.data_utils import Sequence from scipy import linalg from six.moves import range diff --git a/pythontoolkit/losses.py b/pythontoolkit/losses.py index 685f51d..d3a3a6d 100644 --- a/pythontoolkit/losses.py +++ b/pythontoolkit/losses.py @@ -1,4 +1,5 @@ -from keras import backend as K +import tensorflow as tf +from tf.keras import backend as K # Define root mean sqared error loss function. def rmse(y_true, y_pred): diff --git a/pythontoolkit/networks.py b/pythontoolkit/networks.py index 75cfdf6..11b316d 100644 --- a/pythontoolkit/networks.py +++ b/pythontoolkit/networks.py @@ -3,13 +3,13 @@ @author: claesnl """ - +import tensorflow as tf import warnings -from keras.models import Model -from keras.optimizers import Adam -from keras.layers import Conv3D, Conv3DTranspose, Dropout, Input -from keras.layers import Activation, BatchNormalization, concatenate -from keras import regularizers +from tf.keras.models import Model +from tf.keras.optimizers import Adam +from tf.keras.layers import Conv3D, Conv3DTranspose, Dropout, Input +from tf.keras.layers import Activation, BatchNormalization, concatenate +from tf.keras import regularizers warnings.filterwarnings('ignore') diff --git a/pythontoolkit/predict.py b/pythontoolkit/predict.py index b58487a..df1ad88 100644 --- a/pythontoolkit/predict.py +++ b/pythontoolkit/predict.py @@ -1,8 +1,9 @@ +import tensorflow as tf import warnings warnings.filterwarnings('ignore') import os import pickle -from keras.models import load_model, model_from_json +from tf.keras.models import load_model, model_from_json class CNN(): def __init__(self,model,config=None,custom_objects={}): diff --git a/pythontoolkit/train.py b/pythontoolkit/train.py index 791c3de..98d1b25 100644 --- a/pythontoolkit/train.py +++ b/pythontoolkit/train.py @@ -1,4 +1,5 @@ # Import python libraries: +import tensorflow as tf import warnings warnings.filterwarnings('ignore') import os @@ -6,10 +7,10 @@ from glob import glob from CAAI import networks import json -from keras.callbacks import ModelCheckpoint, TensorBoard -from keras.layers import Input -from keras.models import Model, load_model, model_from_json -from keras.optimizers import Adam +from tf.keras.callbacks import ModelCheckpoint, TensorBoard +from tf.keras.layers import Input +from tf.keras.models import Model, load_model, model_from_json +from tf.keras.optimizers import Adam # Define Convolutional Neural Network class. From 3cb5dc3b7f25ea46f34c146038a55a7a8c3d8f0e Mon Sep 17 00:00:00 2001 From: David Kovacs Date: Thu, 28 May 2020 16:05:16 +0200 Subject: [PATCH 8/8] Updated code to run on tensorflow 2.2 and cuda 10.1 --- pythontoolkit/DataAugmentation3D.py | 6 +-- pythontoolkit/losses.py | 3 +- pythontoolkit/networks.py | 18 +++++--- pythontoolkit/predict.py | 3 +- pythontoolkit/train.py | 66 ++++++++++++++++++++--------- scripts/data_generator.py | 15 ++----- scripts/main.py | 9 +++- 7 files changed, 76 insertions(+), 44 deletions(-) diff --git a/pythontoolkit/DataAugmentation3D.py b/pythontoolkit/DataAugmentation3D.py index ed3dddb..dda1092 100755 --- a/pythontoolkit/DataAugmentation3D.py +++ b/pythontoolkit/DataAugmentation3D.py @@ -6,11 +6,11 @@ data. Extended for 3D objects augmentation. """ import tensorflow as tf - -import tf.keras.backend as K +from tensorflow import keras +import tensorflow.keras.backend as K import numpy as np import scipy.ndimage -from tf.keras.utils.data_utils import Sequence +from tensorflow.keras.utils import Sequence from scipy import linalg from six.moves import range diff --git a/pythontoolkit/losses.py b/pythontoolkit/losses.py index d3a3a6d..2428d87 100644 --- a/pythontoolkit/losses.py +++ b/pythontoolkit/losses.py @@ -1,5 +1,6 @@ import tensorflow as tf -from tf.keras import backend as K +from tensorflow import keras +from tensorflow.keras import backend as K # Define root mean sqared error loss function. def rmse(y_true, y_pred): diff --git a/pythontoolkit/networks.py b/pythontoolkit/networks.py index 11b316d..791a20b 100644 --- a/pythontoolkit/networks.py +++ b/pythontoolkit/networks.py @@ -4,18 +4,24 @@ @author: claesnl """ import tensorflow as tf +from tensorflow import keras import warnings -from tf.keras.models import Model -from tf.keras.optimizers import Adam -from tf.keras.layers import Conv3D, Conv3DTranspose, Dropout, Input -from tf.keras.layers import Activation, BatchNormalization, concatenate -from tf.keras import regularizers +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.layers import Conv3D, Conv3DTranspose, Dropout, Input +from tensorflow.keras.layers import Activation, BatchNormalization, concatenate +from tensorflow.keras import regularizers warnings.filterwarnings('ignore') # Define u-net structure. def unet(X, f, dims_out): - + ''' + Inputs: + X inputs + f n_base_filters + dims_out output dimensions of network + ''' # Define convolution block: def conv_block(layer,fsize,dropout,downsample=True): for i in range(1,3): diff --git a/pythontoolkit/predict.py b/pythontoolkit/predict.py index df1ad88..f2d344b 100644 --- a/pythontoolkit/predict.py +++ b/pythontoolkit/predict.py @@ -1,9 +1,10 @@ import tensorflow as tf +from tensorflow import keras import warnings warnings.filterwarnings('ignore') import os import pickle -from tf.keras.models import load_model, model_from_json +from tensorflow.keras.models import load_model, model_from_json class CNN(): def __init__(self,model,config=None,custom_objects={}): diff --git a/pythontoolkit/train.py b/pythontoolkit/train.py index 98d1b25..a254c03 100644 --- a/pythontoolkit/train.py +++ b/pythontoolkit/train.py @@ -1,5 +1,6 @@ # Import python libraries: import tensorflow as tf +from tensorflow import keras import warnings warnings.filterwarnings('ignore') import os @@ -7,10 +8,10 @@ from glob import glob from CAAI import networks import json -from tf.keras.callbacks import ModelCheckpoint, TensorBoard -from tf.keras.layers import Input -from tf.keras.models import Model, load_model, model_from_json -from tf.keras.optimizers import Adam +from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard +from tensorflow.keras.layers import Input +from tensorflow.keras.models import Model, load_model, model_from_json +from tensorflow.keras.optimizers import Adam # Define Convolutional Neural Network class. @@ -74,17 +75,14 @@ def __init__(self,**kwargs): # Check if model has been trained (and can be overwritten), or if we should resume from checkpoint self.check_model_existance() - - # Setup callbacks - self.callbacks_list = self.setup_callbacks() def setup_callbacks(self): # Checkpoints os.makedirs('checkpoint/{}'.format(self.config['model_name']), exist_ok=True) - checkpoint_file=os.path.join('checkpoint',self.config["model_name"],'e{epoch:02d}_{val_loss:.2f}.h5') - checkpoint = ModelCheckpoint(checkpoint_file, monitor='val_loss', verbose=1, save_best_only=False, mode='min',period=self.config["checkpoint_save_rate"]) + checkpoint_file=os.path.join('checkpoint',self.config["model_name"],'e{epoch:02d}.h5') + checkpoint = ModelCheckpoint(checkpoint_file, monitor='val_loss', verbose=1, save_best_only=False, mode='min',save_freq=int(self.config['checkpoint_save_rate']*self.data_loader.n_batches)) # Tensorboard os.makedirs('logs', exist_ok=True) @@ -171,7 +169,7 @@ def generate_model_name_from_params(self): def get_initial_epoch_from_file(self,f): last_epoch = f.split('/')[-1].split('_')[0] assert last_epoch.startswith('e') # check that it is indeed the epoch part of the name that we extract - return int(last_epoch[1:]) # extract only integer part of eXXX + return int(last_epoch[1:-3]) # extract only integer part of eXXX def check_model_existance(self): @@ -206,17 +204,45 @@ def print_config(self): def set(self,key,value): self.config[key] = value + def plot_model(self): + # Compile network if it has not been done: + if not self.is_compiled: + self.compile_network() + + tf.keras.utils.plot_model(self.model, show_shapes=True, + to_file='model_fig.png') def train(self): + + # Setup callbacks + self.callbacks_list = self.setup_callbacks() # Compile network if it has not been done: if not self.is_compiled: self.compile_network() + print(self.model.summary()) + # Check if data generators has been attached if hasattr(self,'data_loader'): - self.training_generator = self.data_loader.generate( self.config['train_pts'] ) - self.validation_generator = self.data_loader.generate( self.config['valid_pts'] ) + #self.training_generator = self.data_loader.generate( self.config['train_pts'] ) + #self.validation_generator = self.data_loader.generate( self.config['valid_pts'] ) + + # Updated to TFv2 generator + generator_shape_input = self.config["input_patch_shape"]+tuple([self.config["input_channels"]]) + generator_shape_output = self.config["input_patch_shape"]+tuple([self.config["output_channels"]]) + self.training_generator = tf.data.Dataset.from_generator( + lambda: self.data_loader.generate( self.config['train_pts'] ), + output_types=(tf.float32,tf.float32), + output_shapes=(tf.TensorShape(generator_shape_input),tf.TensorShape(generator_shape_output))) + self.validation_generator = tf.data.Dataset.from_generator( + lambda: self.data_loader.generate( self.config['valid_pts'] ), + output_types=(tf.float32,tf.float32), + output_shapes=(tf.TensorShape(generator_shape_input),tf.TensorShape(generator_shape_output))) + + self.training_generator = self.training_generator.batch(self.config["batch_size"]) + self.validation_generator = self.validation_generator.batch(self.config["batch_size"]) + else: print("No data generator was attached.") exit(-1) @@ -226,14 +252,14 @@ def train(self): with open('configs/{}.pkl'.format(self.config["model_name"]), 'wb') as file_pi: pickle.dump(self.config, file_pi) - history = self.model.fit_generator(generator = self.training_generator, - steps_per_epoch = self.data_loader.n_batches, - validation_data = self.validation_generator, - validation_steps = 1, - epochs = self.config['epochs'], - verbose = 1, - callbacks = self.callbacks_list, - initial_epoch = self.config['initial_epoch'] ) + history = self.model.fit( self.training_generator, + steps_per_epoch = self.data_loader.n_batches, + validation_data = self.validation_generator, + validation_steps = 1, + epochs = self.config['epochs'], + verbose = 1, + callbacks = self.callbacks_list, + initial_epoch = self.config['initial_epoch'] ) # Save model self.model.save('{}.h5'.format( self.config['model_name'] )) diff --git a/scripts/data_generator.py b/scripts/data_generator.py index af6b39d..451efbf 100644 --- a/scripts/data_generator.py +++ b/scripts/data_generator.py @@ -7,7 +7,6 @@ class DataGenerator(): def __init__(self, config): - self.batch_size = config['batch_size'] self.img_res = config['input_patch_shape'] self.input_channels = config['input_channels'] self.output_channels = config['output_channels'] @@ -23,7 +22,7 @@ def __init__(self, config): self.tgt_name = 'minc/res_256_truex1_256_CT.npy' self.n_batches = len(self.summary['train']) if 'train' in self.summary else len(self.summary['train_0']) - self.n_batches /= self.batch_size + self.n_batches /= config['batch_size'] def generate(self, train_or_test): while 1: @@ -31,15 +30,9 @@ def generate(self, train_or_test): yield X, y def __data_generation(self, train_or_test): - X = np.empty( (self.batch_size,) + self.img_res + (self.input_channels,) ) - y = np.empty( (self.batch_size,) + self.img_res + (self.output_channels,) ) - - for i in range(self.batch_size): - - dat,tgt = self.load(train_or_test,load_mode='memmap') - - X[i,...] = dat - y[i,...] = tgt.reshape(self.img_res + (self.output_channels,)) + X,y = self.load(train_or_test,load_mode='memmap') + X = X.reshape(self.img_res + (self.input_channels,)) + y = y.reshape(self.img_res + (self.output_channels,)) if train_or_test.startswith('train') and self.augmentation: X, y = self.data_augmentation.random_transform_batch(X,y) diff --git a/scripts/main.py b/scripts/main.py index 1d07538..cfbfa53 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -6,18 +6,21 @@ from data_generator import DataGenerator import pyminc.volumes.factory as pyminc +#change path to your data folder data_path = '/homes/kovacs/project_data/lymphoma-auto-contouring/' + # Define training function: def train_v1(): # Parameter definition for the network. These must be changed to fit your own data. Other parameters exist. See train.py - cnn = CNN(model_name='v2_test_david', + cnn = CNN(model_name='v7_tf==2.2', input_patch_shape=(256,256,8), # Dimensions of model input in the form (x,y,z) + overwrite = True, input_channels=2, # Number of model input channels. In this case, PET in 1st channel, CT in 2nd channel. output_channels=1, # Number of model output channels. In this case, a PET image as output. batch_size=1, # Number of inputs used in batch normalization. 1 = no batch normalization. - epochs=100, # Number of training epochs. A stopping critera can also be implemented. + epochs=3, # Number of training epochs. A stopping critera can also be implemented. learning_rate=1e-4, # Set learning rate. checkpoint_save_rate=50, # Saves model each # epoch. loss_functions=[['mean_absolute_error',1]], # Define loss function @@ -29,7 +32,9 @@ def train_v1(): # Attach generator cnn.data_loader = DataGenerator(cnn.config) # Load data generator + #printint model cnn.print_config() # Print network configurations + #cnn.plot_model() final_model_name = cnn.train() # Create training network