diff --git a/args_helper.py b/args_helper.py index 94370efa..3f37f90a 100644 --- a/args_helper.py +++ b/args_helper.py @@ -297,6 +297,12 @@ def parse_arguments(self, jupyter_mode=False): default=False, help="Boolean flag to indicate whether to use bias" ) + parser.add_argument( + "--bias_fc", #_lastlayer", + action="store_true", + default=False, + help="Boolean flag to indicate whether to use bias at the last fc layers" + ) parser.add_argument( "--freeze-weights", action="store_true", @@ -878,6 +884,18 @@ def parse_arguments(self, jupyter_mode=False): default=0, help="Use mixed precision or not" ) + parser.add_argument( + "--transfer_learning", + type=int, + default=0, + help="Use transfer learning or not" + ) + parser.add_argument( + "--uv_decomp", + type=int, + default=0, + help="Use W=UV decomposition or not" + ) parser.add_argument('--transformer_emsize', type=int, default=200, help='size of word embeddings') parser.add_argument('--transformer_nhid', type=int, default=200, @@ -893,6 +911,7 @@ def parse_arguments(self, jupyter_mode=False): parser.add_argument('--transformer_nhead', type=int, default=2, help='the number of heads in the encoder/decoder of the transformer model') + parser.add_argument( "--only-sanity", action="store_true", diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh new file mode 100755 index 00000000..9db28e48 --- /dev/null +++ b/caltech_exec_GD.sh @@ -0,0 +1,62 @@ +# ResNet50_tf (changed the output size) + +# Weight training +python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 +#python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 + + + +# 1FC +:< log_caltech_hc_sparsity_50_1FC 2>&1 +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5_1FC.yml > log_caltech_hc_sparsity_5_1FC 2>&1 +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2_1FC.yml > log_caltech_hc_sparsity_2_1FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_50_1FC.yml > log_caltech_ep_sparsity_50_1FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_5_1FC.yml > log_caltech_ep_sparsity_5_1FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_2_1FC.yml > log_caltech_ep_sparsity_2_1FC 2>&1 +BLOCK + + +# 2FC +:< log_caltech_hc_sparsity_50_2FC 2>&1 +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5_2FC.yml > log_caltech_hc_sparsity_5_2FC 2>&1 +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2_2FC.yml > log_caltech_hc_sparsity_2_2FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_50_2FC.yml > log_caltech_ep_sparsity_50_2FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_5_2FC.yml > log_caltech_ep_sparsity_5_2FC 2>&1 +python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_2_2FC.yml > log_caltech_ep_sparsity_2_2FC 2>&1 +BLOCK + + + + + + + +# ======== OLD + + + + + + + +# HC +#:< log_caltech_hc_sparsity_50_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_unconstrained.yml > log_caltech_hc_sparsity_unconstrained_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5.yml > log_caltech_hc_sparsity_5_2lam6_UV 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2.yml > log_caltech_hc_sparsity_2_5lam6_UV 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5.yml > log_caltech_hc_sparsity_0_5_1_5lam5_UV 2>&1 +#BLOCK +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5.yml > log_caltech_hc_sparsity_0_5_2lam5_50epoch_mutli 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5_1lam5.yml > log_caltech_hc_sparsity_0_5_1lam5_50epoch_mutli 2>&1 + +# EP +#:< log_caltech_ep_sparsity_50_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_5.yml > log_caltech_ep_sparsity_5_UV 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_2.yml > log_caltech_ep_sparsity_2_UV 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_0_5.yml > log_caltech_ep_sparsity_0_5_UV 2>&1 +#BLOCK + diff --git a/configs/sanity/caltech_sanity.yml b/configs/sanity/caltech_sanity.yml new file mode 100644 index 00000000..28fcf402 --- /dev/null +++ b/configs/sanity/caltech_sanity.yml @@ -0,0 +1,72 @@ +subfolder: resnet50_caltech_1FC_sanity + +# algorithm +algo: 'hc_iter' +iter_period: 5 + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_HC +transfer_learning: True + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.001 +lr_policy: cosine_lr #cosine_lr #constant_lr +fine_tune_optimizer: adam +fine_tune_lr: 0.0001 +fine_tune_lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_type: BottomK +#target_sparsity: 2 +# decide if you want to "unflag" +unflag_before_finetune: True +init: signed_constant +score_init: unif #skew #half #bimodal #skew # bern +scale_fan: False + +# ===== Rounding ===== # +round: naive +noise: True +noise_ratio: 0 + +# ===== Quantization ===== # +hc_quantized: True +quantize_threshold: 0.5 + +# ===== Regularization ===== # +regularization: L2 +#lmbda: 0.000005 + + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_sanity_checks: True +only_sanity: True +sanity_folder: results/resnet50_caltech_HC_sparsity_50_1FC/ + +#results/resnet50_caltech_HC_sparsity_5_1FC/ +#results/resnet50_caltech_HC_sparsity_2_1FC/ # AWS-4 + diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml new file mode 100644 index 00000000..b12a08f6 --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -0,0 +1,51 @@ +subfolder: resnet50_caltech_weight_training +trial_num: 1 +score_init: 'all_one' +bias_lastlayer: True + +# algorithm +algo: 'hc_iter' +weight_training: True +transfer_learning: True + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: constant_lr #multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 5 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/configs/training/resnet50/caltech_resnet50_training_1FC.yml b/configs/training/resnet50/caltech_resnet50_training_1FC.yml new file mode 100644 index 00000000..262d2fc3 --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training_1FC.yml @@ -0,0 +1,51 @@ +subfolder: resnet50_caltech_weight_training_1FC +trial_num: 1 +score_init: 'all_one' +#bias_fc: True + +# algorithm +algo: 'hc_iter' +weight_training: True +transfer_learning: True + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/configs/training/resnet50/caltech_resnet50_training_2FC.yml b/configs/training/resnet50/caltech_resnet50_training_2FC.yml new file mode 100644 index 00000000..f2d99260 --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training_2FC.yml @@ -0,0 +1,52 @@ +subfolder: resnet50_caltech_weight_training_2FC +trial_num: 1 +score_init: 'all_one' +#bias_fc: True +uv_decomp: True + +# algorithm +algo: 'hc_iter' +weight_training: True +transfer_learning: True + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/data/__init__.py b/data/__init__.py index 68298408..bbaf3888 100644 --- a/data/__init__.py +++ b/data/__init__.py @@ -4,3 +4,4 @@ from data.tinyimagenet import TinyImageNet from data.mnist import MNIST from data.bigcifar import BigCIFAR10 +from data.caltech101 import Caltech101 \ No newline at end of file diff --git a/data/caltech101.py b/data/caltech101.py new file mode 100644 index 00000000..c66e7bfa --- /dev/null +++ b/data/caltech101.py @@ -0,0 +1,108 @@ + +import joblib +import cv2 +import os +import time +import random +import numpy as np + +from imutils import paths +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split + +# Load torch...!!! +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +# Load torchvision ...!!! +from torchvision import transforms + + +from args_helper import parser_args + + +class Caltech101: + def __init__(self, args): + super(Caltech101, self).__init__() + + + image_paths = list(paths.list_images('test_caltech101/101_ObjectCategories')) + + data = [] + labels = [] + for img_path in image_paths: + label = img_path.split(os.path.sep)[-2] + if label == "BACKGROUND_Google": + continue + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + data.append(img) + labels.append(label) + + data = np.array(data) + labels = np.array(labels) + + + lb = LabelEncoder() + labels = lb.fit_transform(labels) + print(f"Total Number of Classes: {len(lb.classes_)}") + + train_transforms = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), + ]) + + val_transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), + ]) + + # divide the data into train, validation, and test set + (X, x_test , Y, y_test) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) + (x_train, x_val, y_train, y_val) = train_test_split(X, Y, test_size=0.25, random_state=42) + #(X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) + #(x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) + print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}") + + + train_data = CustomDataset(x_train, y_train, train_transforms) + val_data = CustomDataset(x_val, y_val, val_transform) + test_data = CustomDataset(x_test, y_test, val_transform) + + self.train_loader = DataLoader(train_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + self.val_loader = DataLoader(test_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + #self.val_loader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + self.num_classes = len(lb.classes_) + + #valLoader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + #testLoader = DataLoader(test_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + + +# custom dataset class +class CustomDataset(Dataset): + def __init__(self, images, labels= None, transforms = None): + self.labels = labels + self.images = images + self.transforms = transforms + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + data = self.images[index][:] + + if self.transforms: + data = self.transforms(data) + + if self.labels is not None: + return (data, self.labels[index]) + else: + return data diff --git a/main.py b/main.py index cc1af83e..79f7c5bf 100644 --- a/main.py +++ b/main.py @@ -45,10 +45,14 @@ def main_worker(gpu, ngpus_per_node): result_root = result_subroot + '/results_' + idty_str + '/' else: result_root = 'results/results_' + idty_str + '/' - + data = get_dataset(parser_args) if not os.path.isdir(result_root): os.mkdir(result_root) - model = get_model(parser_args) + if parser_args.transfer_learning: + model = get_model(parser_args, data.num_classes) + model = load_pretrained_imagenet(model, data.val_loader) + else: + model = get_model(parser_args) print_model(model, parser_args) if parser_args.weight_training: @@ -65,7 +69,6 @@ def main_worker(gpu, ngpus_per_node): else: model2 = None optimizer = get_optimizer(parser_args, model) - data = get_dataset(parser_args) scheduler = get_scheduler(optimizer, parser_args.lr_policy) #lr_policy = get_policy(parser_args.lr_policy)(optimizer, parser_args) if parser_args.label_smoothing is None: diff --git a/main_utils.py b/main_utils.py index 7f54a7cd..a896ac1c 100644 --- a/main_utils.py +++ b/main_utils.py @@ -38,6 +38,7 @@ redraw, get_layers, get_prune_rate, + load_pretrained_imagenet ) from utils.schedulers import get_scheduler from utils.utils import set_seed, plot_histogram_scores @@ -381,6 +382,7 @@ def finetune(model, parser_args, data, criterion, old_epoch_list, old_test_acc_b def get_idty_str(parser_args): train_mode_str = 'weight_training' if parser_args.weight_training else 'pruning' + epoch_str = parser_args.epochs dataset_str = parser_args.dataset model_str = parser_args.arch algo_str = parser_args.algo @@ -402,8 +404,8 @@ def get_idty_str(parser_args): run_idx_str = parser_args.run_idx lam_ft_str = parser_args.lam_finetune_loss n_step_ft_str = parser_args.num_step_finetune - idty_str = "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_finetune_{}_MAML_{}_{}_fan_{}_{}_{}_width_{}_seed_{}_idx_{}".\ - format(train_mode_str, dataset_str, model_str, algo_str, rate_str, period_str, reg_str, reg_lmbda, + idty_str = "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_finetune_{}_MAML_{}_{}_fan_{}_{}_{}_width_{}_seed_{}_idx_{}".\ + format(train_mode_str, epoch_str, dataset_str, model_str, algo_str, rate_str, period_str, reg_str, reg_lmbda, opt_str, policy_str, lr_str, lr_gamma, lr_adj, finetune_lr_str, lam_ft_str, n_step_ft_str, fan_str, w_str, s_str, width_str, seed_str, run_idx_str).replace(".", "_") @@ -966,17 +968,20 @@ def get_dataset(parser_args): return dataset -def get_model(parser_args): +def get_model(parser_args, num_classes=-1): if parser_args.first_layer_dense: parser_args.first_layer_type = "DenseConv" print("=> Creating model '{}'".format(parser_args.arch)) if parser_args.fixed_init: set_seed(parser_args.seed_fixed_init) - if parser_args.arch in ['Conv4', 'Conv4Normal']: - model = models.__dict__[parser_args.arch](width=parser_args.width) + if parser_args.transfer_learning: + model = models.__dict__[parser_args.arch](num_classes=num_classes) else: - model = models.__dict__[parser_args.arch]() + if parser_args.arch in ['Conv4', 'Conv4Normal']: + model = models.__dict__[parser_args.arch](width=parser_args.width) + else: + model = models.__dict__[parser_args.arch]() if parser_args.fixed_init: set_seed(parser_args.seed) diff --git a/models/resnet.py b/models/resnet.py index 2c642134..952ae017 100644 --- a/models/resnet.py +++ b/models/resnet.py @@ -1,4 +1,6 @@ import torch.nn as nn +import torch.nn.functional as F +import pdb from args_helper import parser_args from utils.builder import get_builder @@ -121,7 +123,34 @@ def __init__(self, builder, block, layers, num_classes=1000, base_width=64): if parser_args.last_layer_dense: self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1) else: - self.fc = builder.conv1x1(512 * block.expansion, num_classes) + if parser_args.transfer_learning: + if parser_args.uv_decomp: + dim_size = 1000 + else: + dim_size = num_classes + else: + dim_size = num_classes + + if parser_args.bias_fc: + tmp = parser_args.bias + parser_args.bias = True + self.fc = builder.conv1x1(512 * block.expansion, dim_size) + parser_args.bias = tmp + else: + self.fc = builder.conv1x1(512 * block.expansion, dim_size) + + if parser_args.transfer_learning: + self.dropout = nn.Dropout2d(0.4) + if parser_args.uv_decomp: + if parser_args.bias_fc: + tmp = parser_args.bias + parser_args.bias = True + self.fc2 = builder.conv1x1(dim_size, num_classes) + parser_args.bias = tmp + else: + self.fc2 = builder.conv1x1(dim_size, num_classes) + + self.prunable_layer_names, self.prunable_biases = self.get_prunable_param_names() @@ -158,7 +187,7 @@ def get_prunable_param_names(model): return prunable_weights, prunable_biases - def forward(self, x): + def forward(self, x, hidden=False): # update score thresholds for global ep if parser_args.algo in ['global_ep', 'global_ep_iter'] or parser_args.bottom_k_on_forward: prune(self, update_thresholds_only=True) @@ -174,24 +203,36 @@ def forward(self, x): x = self.layer3(x) x = self.layer4(x) - x = self.avgpool(x) + if hidden: + return x + + if parser_args.transfer_learning: + x = F.adaptive_avg_pool2d(x, 1).reshape(x.size(0), -1) + x = self.dropout(x) + x = x.view(x.size(0), -1, 1, 1) + else: + x = self.avgpool(x) + x = self.fc(x) + if parser_args.transfer_learning and parser_args.uv_decomp: + x = self.fc2(x) + x = x.view(x.size(0), -1) return x # ResNet }}} -def ResNet18(pretrained=False): - return ResNet(get_builder(), BasicBlock, [2, 2, 2, 2], 1000) +def ResNet18(pretrained=False, num_classes=1000): + return ResNet(get_builder(), BasicBlock, [2, 2, 2, 2], num_classes=num_classes) -def ResNet50(pretrained=False): - return ResNet(get_builder(), Bottleneck, [3, 4, 6, 3], 1000) +def ResNet50(pretrained=False, num_classes=1000): + return ResNet(get_builder(), Bottleneck, [3, 4, 6, 3], num_classes=num_classes) -def ResNet101(pretrained=False): - return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], 200) +def ResNet101(pretrained=False, num_classes=200): # default: tinyImagenet + return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], num_classes=num_classes) #return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], 1000) diff --git a/sanity_check_GD.sh b/sanity_check_GD.sh index c55442be..d0d96ddf 100644 --- a/sanity_check_GD.sh +++ b/sanity_check_GD.sh @@ -19,8 +19,19 @@ #python main.py --config configs/sanity/mobilenet_sanity.yml > log_sanity_mobilenet_sparsity_5 2>&1 #python main.py --config configs/sanity/mobilenet_sanity.yml > log_sanity_mobilenet_sparsity_1_4 2>&1 -python main.py --config configs/sanity/resnet18_sanity.yml > log_sanity_resnet18_sparsity_5 2>&1 +#python main.py --config configs/sanity/resnet18_sanity.yml > log_sanity_resnet18_sparsity_5 2>&1 +# Caltech +# 2% sparsity +#python main.py --config configs/sanity/caltech_sanity.yml > log_sanity_caltech_sparsity_2 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2_1FC_invert.yml > log_invert_caltech_sparsity_2 2>&1 +# 5% sparsity +#python main.py --config configs/sanity/caltech_sanity.yml > log_sanity_caltech_sparsity_5 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5_1FC_invert.yml > log_invert_caltech_sparsity_5 2>&1 + +# 50% sparsity +python main.py --config configs/sanity/caltech_sanity.yml > log_sanity_caltech_sparsity_50 2>&1 +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_50_1FC_invert.yml > log_invert_caltech_sparsity_50 2>&1 diff --git a/test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp b/test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp new file mode 100644 index 00000000..96905fe5 Binary files /dev/null and b/test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp differ diff --git a/test_caltech101/train.py b/test_caltech101/train.py new file mode 100644 index 00000000..d20694e7 --- /dev/null +++ b/test_caltech101/train.py @@ -0,0 +1,239 @@ +import joblib +import cv2 +import os +import time +import random +import pretrainedmodels +import numpy as np + +from imutils import paths +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split + +# Load torch...!!! +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +# Load torchvision ...!!! +from torchvision import transforms + +'''SEED Everything''' +def seed_everything(SEED=42): + random.seed(SEED) + np.random.seed(SEED) + torch.manual_seed(SEED) + torch.cuda.manual_seed(SEED) + torch.cuda.manual_seed_all(SEED) + torch.backends.cudnn.benchmark = True # keep True if all the input have same size. +SEED=42 +seed_everything(SEED=SEED) +'''SEED Everything''' + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU +epochs = 5 # Number of epochs +BS = 16 # Batch size + + +image_paths = list(paths.list_images('./101_ObjectCategories')) + +data = [] +labels = [] +for img_path in image_paths: + label = img_path.split(os.path.sep)[-2] + if label == "BACKGROUND_Google": + continue + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + data.append(img) + labels.append(label) + +data = np.array(data) +labels = np.array(labels) + + +lb = LabelEncoder() +labels = lb.fit_transform(labels) +print(f"Total Number of Classes: {len(lb.classes_)}") + +train_transforms = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), +]) + +val_transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), +]) + +# divide the data into train, validation, and test set +(X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) +(x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) +print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}") + + +# custom dataset class +class CustomDataset(Dataset): + def __init__(self, images, labels= None, transforms = None): + self.labels = labels + self.images = images + self.transforms = transforms + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + data = self.images[index][:] + + if self.transforms: + data = self.transforms(data) + + if self.labels is not None: + return (data, self.labels[index]) + else: + return data + +train_data = CustomDataset(x_train, y_train, train_transforms) +val_data = CustomDataset(x_val, y_val, val_transform) +test_data = CustomDataset(x_test, y_test, val_transform) + +trainLoader = DataLoader(train_data, batch_size=BS, shuffle=True, num_workers=4) +valLoader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) +testLoader = DataLoader(test_data, batch_size=BS, shuffle=True, num_workers=4) + + +class ResNet34(nn.Module): + def __init__(self, pretrained): + super(ResNet34, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet34'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(512, len(lb.classes_)) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + +class ResNet50(nn.Module): + def __init__(self, pretrained): + super(ResNet50, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(2048, len(lb.classes_)) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + +#model = ResNet34(pretrained=True).to(device) +model = ResNet50(pretrained=True).to(device) +print(model) + +# loss function +criterion = nn.CrossEntropyLoss() + +# optimizer +optimizer = optim.Adam(model.parameters(), lr = 1e-4) + +# training function +#train_loss , train_accuracy = [], [] +def train(model, trainLoader): + model.train() + running_loss = 0.0 + running_correct = 0 + for batch_idx, data in enumerate(trainLoader): + data, target = data[0].to(device), data[1].to(device) + optimizer.zero_grad() + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + loss.backward() + optimizer.step() + if batch_idx % 40 == 0: + print("batch-[{}/{}] Loss: {}".format(batch_idx, len(trainLoader), loss.item())) + + loss = running_loss/len(trainLoader.dataset) + accuracy = 100. * running_correct/len(trainLoader.dataset) + + print(f"Train Loss: {loss:.4f}, Train Acc: {accuracy:.2f}") + return loss, accuracy + +#validation function +def validate(model, dataloader): + print('Validating') + model.eval() + running_loss = 0.0 + running_correct = 0 + with torch.no_grad(): + for i, data in enumerate(dataloader): + data, target = data[0].to(device), data[1].to(device) + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + + loss = running_loss/len(dataloader.dataset) + accuracy = 100. * running_correct/len(dataloader.dataset) + print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') + + return loss, accuracy + +def test(model, dataloader): + correct = 0 + total = 0 + with torch.no_grad(): + for data in testLoader: + inputs, target = data[0].to(device), data[1].to(device) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total += target.size(0) + correct += (predicted == torch.max(target, 1)[1]).sum().item() + return correct, total + +if __name__ == "__main__": + train_loss, train_accuracy = [], [] + val_loss, val_accuracy = [], [] + print(f"Training on {len(train_data)} examples, validating on {len(val_data)} examples...") + start = time.time() + for epoch in range(epochs): + print(f"Epoch {epoch+1} of {epochs}") + train_epoch_loss, train_epoch_accuracy = train(model, trainLoader) + val_epoch_loss, val_epoch_accuracy = validate(model, valLoader) + train_loss.append(train_epoch_loss) + train_accuracy.append(train_epoch_accuracy) + val_loss.append(val_epoch_loss) + val_accuracy.append(val_epoch_accuracy) + end = time.time() + print((end-start)/60, 'minutes') diff --git a/utils/conv_type.py b/utils/conv_type.py index c235d14b..6d33af33 100644 --- a/utils/conv_type.py +++ b/utils/conv_type.py @@ -138,6 +138,9 @@ def __init__(self, *args, **kwargs): self.scores.data = m.sample() m = Beta(torch.ones_like(self.bias_scores.data)*alpha, torch.ones_like(self.bias_scores.data)*beta) self.bias_scores.data = m.sample() + elif parser_args.score_init in ['all_one']: + self.scores.data = torch.ones_like(self.scores.data) + self.bias_scores.data = torch.ones_like(self.bias_scores.data) else: nn.init.kaiming_uniform_(self.scores, a=math.sqrt(5)) nn.init.uniform_(self.bias_scores, a=-1.0, b=1.0) # can't do kaiming here. picking U[-1, 1] for no real reason diff --git a/utils/net_utils.py b/utils/net_utils.py index 46462981..f9971855 100644 --- a/utils/net_utils.py +++ b/utils/net_utils.py @@ -10,6 +10,8 @@ import torch import torch.nn as nn +import torch.nn.functional as F +import pretrainedmodels from utils.mask_layers import MaskLinear, MaskConv from utils.conv_type import GetSubnet as GetSubnetConv @@ -86,6 +88,9 @@ def get_layers(arch='Conv4', model=None): # if len(layer[basic_block_id].shortcut) > 0: # conv_layers.append(layer[basic_block_id].shortcut[0]) linear_layers = [model.fc] + if parser_args.uv_decomp: + linear_layers.append(model.fc2) + elif arch == 'ResNet101': conv_layers = [model.conv1] for layer in [model.layer1, model.layer2, model.layer3, model.layer4]: @@ -93,7 +98,6 @@ def get_layers(arch='Conv4', model=None): conv_layers.append(layer[basic_block_id].conv1) conv_layers.append(layer[basic_block_id].conv2) conv_layers.append(layer[basic_block_id].conv3) - linear_layers = [model.fc] elif arch in ['vgg16', 'tinyvgg16']: @@ -129,6 +133,26 @@ def get_layers(arch='Conv4', model=None): return (conv_layers, linear_layers) + +def get_bn_layers(arch='ResNet50', model=None): + if isinstance(model, nn.parallel.DistributedDataParallel): + model = model.module + + if arch == 'ResNet50': + bn_layers = [model.bn1] + for layer in [model.layer1, model.layer2, model.layer3, model.layer4]: + for basic_block_id in [i for i in range(len(layer))]: + bn_layers.append(layer[basic_block_id].bn1) + bn_layers.append(layer[basic_block_id].bn2) + bn_layers.append(layer[basic_block_id].bn3) + + return bn_layers + + + + + + def redraw(model, shuffle=False, reinit=False, invert=False, chg_mask=False, chg_weight=False): cp_model = copy.deepcopy(model) conv_layers, linear_layers = get_layers(parser_args.arch, cp_model) @@ -737,3 +761,151 @@ def zero_one_loss(output, target): pred = pred.t() zero_one_loss_instance = ~pred.eq(target.view(1, -1).expand_as(pred)) return torch.mean(zero_one_loss_instance.to(torch.float32)) + + +def load_pretrained_imagenet(model, dataloader): + + + pretrained = imagenet_ResNet50(pretrained=True).cuda() + model_s = pretrained.model # source model + #model_s = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') # source model + #model_s = model_s.cuda() + + #for param_tensor in model_s.state_dict(): + # print(param_tensor, "\t", model_s.state_dict()[param_tensor].size()) + PATH = 'pretrained_model_imagenet.pth' + torch.save(model_s.state_dict(), PATH) + model.load_state_dict(torch.load(PATH), strict=False) + + # test the consistency of model and pretrained model + model = model.cuda() + x = torch.rand(16,3,224,224).cuda() # random dataset + z1 = model.forward(x, hidden=True) + z2 = pretrained(x, hidden=True) + #z2 = model_s.features(x) + print('Compare hidden feature: ', (z1 == z2).all()) + + + # load the final layer + #num_classes = pretrained.l0.weight.shape[0] + #model.fc.weight.data = pretrained.l0.weight.data.view(num_classes, -1, 1, 1) + #model.fc.bias.data = pretrained.l0.bias.data + ''' + y1 = model(x) + y2 = pretrained(x) + print('Compare prediction: ', torch.norm(y1 - y2)) + print('Note: this is small only if we turn off dropout at both loaded/our models') + #pdb.set_trace() + + print('pretrained model on transfer task') + val_loss, val_accuracy = validate(model_s, dataloader) + ''' + + return model + + +class imagenet_ResNet50(nn.Module): + def __init__(self, pretrained): + super(imagenet_ResNet50, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(2048, 101) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x, hidden=False): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + if hidden: + return x + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + + +''' +def test_and_load_pretrained_imagenet(model, dataloader): + model = model.cuda() + x = torch.rand(16,3,224,224).cuda() # random dataset + z1 = model.forward(x, hidden=True) + + # load pytorch pretrained model (imagenet) + # imagenet_model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + # imagenet_model = imagenet_model.cuda() + imagenet_model = imagenet_ResNet50(pretrained=True).cuda() + #z2 = imagenet_model.forward(x, hidden=True) + + # check initial model + print('our initial model on transfer task') + val_loss, val_accuracy = validate(model, dataloader) + + print('pretrained model on transfer task') + val_loss, val_accuracy = validate(imagenet_model, dataloader) + + # copy weights from imagenet_model to model + #import pdb; pdb.set_trace() + conv, lin = get_layers('ResNet50', model) + layers = [*conv, *lin] + conv2, lin2 = get_layers('ResNet50', imagenet_model.model) + layers2 = [*conv2, *lin2] + + for target_layer, source_layer in zip(layers, layers2): + if source_layer is None: + continue + #print(target_layer, source_layer) + assert(target_layer.weight.data.shape == source_layer.weight.data.shape) + target_layer.weight.data = source_layer.weight.data + + bn = get_bn_layers('ResNet50', model) + bn2 = get_bn_layers('ResNet50', imagenet_model.model) + + for target_layer, source_layer in zip(bn, bn2): + assert(target_layer.weight.data.shape == source_layer.weight.data.shape) + target_layer.weight.data = source_layer.weight.data + target_layer.bias.data = source_layer.bias.data + z3 = model.forward(x, hidden=True) + + # check updated model + print('our updated model on transfer task') + val_loss, val_accuracy = validate(model, dataloader) + + print((z1 == z2).all()) + print((z3 == z2).all()) + print((z3 == z2).all()) + import pdb; pdb.set_trace() + + return model +''' + +#validation function +def validate(model, dataloader): + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU + criterion = nn.CrossEntropyLoss() + model = model.to(device) + + print('Validating') + model.eval() + running_loss = 0.0 + running_correct = 0 + with torch.no_grad(): + for i, data in enumerate(dataloader): + data, target = data[0].to(device), data[1].to(device) + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + + loss = running_loss/len(dataloader.dataset) + accuracy = 100. * running_correct/len(dataloader.dataset) + print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') + + return loss, accuracy +