From 9190205783e04653d827fb45ae092dc096fc640e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 16 Jan 2022 21:58:09 +0000 Subject: [PATCH 01/14] caltech101 transfer learning start --- caltech_exec_GD.sh | 12 + .../resnet50/caltech_resnet50_training.yml | 39 +++ test_caltech101/train.py | 239 ++++++++++++++++++ 3 files changed, 290 insertions(+) create mode 100755 caltech_exec_GD.sh create mode 100644 configs/training/resnet50/caltech_resnet50_training.yml create mode 100644 test_caltech101/train.py diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh new file mode 100755 index 00000000..b123e9c4 --- /dev/null +++ b/caltech_exec_GD.sh @@ -0,0 +1,12 @@ +# ResNet50_tf (changed the output size) + +# Weight training +python main.py --config configs/training/resnet50/caltech_resnet50_training.yml + +# HC +#python main.py --config configs/hypercube/tinyImageNet/resnet18/resnet18_sparsity_1_4_adam_9lam6.yml > log_tiny_hc_sparsity_1_4_adam_9lam6 2>&1 + +# EP +#python main.py --config configs/ep/tinyImageNet/resnet18/sparsity_0_75.yml > log_tiny_ep_sparsity_0_75 2>&1 + + diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml new file mode 100644 index 00000000..7977a609 --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -0,0 +1,39 @@ +subfolder: resnet50_caltech_weight_training +trial_num: 1 + + +# algorithm +algo: 'training' + +# Architecture +arch: resnet50 + +# ===== Dataset ===== # +dataset: caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: constant_lr #multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 5 +wd: 0 +momentum: 0.9 +batch_size: 16 + +weight_training: True + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + diff --git a/test_caltech101/train.py b/test_caltech101/train.py new file mode 100644 index 00000000..d20694e7 --- /dev/null +++ b/test_caltech101/train.py @@ -0,0 +1,239 @@ +import joblib +import cv2 +import os +import time +import random +import pretrainedmodels +import numpy as np + +from imutils import paths +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split + +# Load torch...!!! +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +# Load torchvision ...!!! +from torchvision import transforms + +'''SEED Everything''' +def seed_everything(SEED=42): + random.seed(SEED) + np.random.seed(SEED) + torch.manual_seed(SEED) + torch.cuda.manual_seed(SEED) + torch.cuda.manual_seed_all(SEED) + torch.backends.cudnn.benchmark = True # keep True if all the input have same size. +SEED=42 +seed_everything(SEED=SEED) +'''SEED Everything''' + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU +epochs = 5 # Number of epochs +BS = 16 # Batch size + + +image_paths = list(paths.list_images('./101_ObjectCategories')) + +data = [] +labels = [] +for img_path in image_paths: + label = img_path.split(os.path.sep)[-2] + if label == "BACKGROUND_Google": + continue + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + data.append(img) + labels.append(label) + +data = np.array(data) +labels = np.array(labels) + + +lb = LabelEncoder() +labels = lb.fit_transform(labels) +print(f"Total Number of Classes: {len(lb.classes_)}") + +train_transforms = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), +]) + +val_transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), +]) + +# divide the data into train, validation, and test set +(X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) +(x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) +print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}") + + +# custom dataset class +class CustomDataset(Dataset): + def __init__(self, images, labels= None, transforms = None): + self.labels = labels + self.images = images + self.transforms = transforms + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + data = self.images[index][:] + + if self.transforms: + data = self.transforms(data) + + if self.labels is not None: + return (data, self.labels[index]) + else: + return data + +train_data = CustomDataset(x_train, y_train, train_transforms) +val_data = CustomDataset(x_val, y_val, val_transform) +test_data = CustomDataset(x_test, y_test, val_transform) + +trainLoader = DataLoader(train_data, batch_size=BS, shuffle=True, num_workers=4) +valLoader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) +testLoader = DataLoader(test_data, batch_size=BS, shuffle=True, num_workers=4) + + +class ResNet34(nn.Module): + def __init__(self, pretrained): + super(ResNet34, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet34'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(512, len(lb.classes_)) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + +class ResNet50(nn.Module): + def __init__(self, pretrained): + super(ResNet50, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(2048, len(lb.classes_)) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + +#model = ResNet34(pretrained=True).to(device) +model = ResNet50(pretrained=True).to(device) +print(model) + +# loss function +criterion = nn.CrossEntropyLoss() + +# optimizer +optimizer = optim.Adam(model.parameters(), lr = 1e-4) + +# training function +#train_loss , train_accuracy = [], [] +def train(model, trainLoader): + model.train() + running_loss = 0.0 + running_correct = 0 + for batch_idx, data in enumerate(trainLoader): + data, target = data[0].to(device), data[1].to(device) + optimizer.zero_grad() + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + loss.backward() + optimizer.step() + if batch_idx % 40 == 0: + print("batch-[{}/{}] Loss: {}".format(batch_idx, len(trainLoader), loss.item())) + + loss = running_loss/len(trainLoader.dataset) + accuracy = 100. * running_correct/len(trainLoader.dataset) + + print(f"Train Loss: {loss:.4f}, Train Acc: {accuracy:.2f}") + return loss, accuracy + +#validation function +def validate(model, dataloader): + print('Validating') + model.eval() + running_loss = 0.0 + running_correct = 0 + with torch.no_grad(): + for i, data in enumerate(dataloader): + data, target = data[0].to(device), data[1].to(device) + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + + loss = running_loss/len(dataloader.dataset) + accuracy = 100. * running_correct/len(dataloader.dataset) + print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') + + return loss, accuracy + +def test(model, dataloader): + correct = 0 + total = 0 + with torch.no_grad(): + for data in testLoader: + inputs, target = data[0].to(device), data[1].to(device) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total += target.size(0) + correct += (predicted == torch.max(target, 1)[1]).sum().item() + return correct, total + +if __name__ == "__main__": + train_loss, train_accuracy = [], [] + val_loss, val_accuracy = [], [] + print(f"Training on {len(train_data)} examples, validating on {len(val_data)} examples...") + start = time.time() + for epoch in range(epochs): + print(f"Epoch {epoch+1} of {epochs}") + train_epoch_loss, train_epoch_accuracy = train(model, trainLoader) + val_epoch_loss, val_epoch_accuracy = validate(model, valLoader) + train_loss.append(train_epoch_loss) + train_accuracy.append(train_epoch_accuracy) + val_loss.append(val_epoch_loss) + val_accuracy.append(val_epoch_accuracy) + end = time.time() + print((end-start)/60, 'minutes') From 30832acc086b2b55430cfc9871412e64983ddf97 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 16 Jan 2022 22:27:31 +0000 Subject: [PATCH 02/14] change model for transfer learning --- args_helper.py | 6 +++++ .../resnet50/caltech_resnet50_training.yml | 6 +++-- models/resnet.py | 24 +++++++++++++------ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/args_helper.py b/args_helper.py index 8c68edf7..416177a2 100644 --- a/args_helper.py +++ b/args_helper.py @@ -849,6 +849,12 @@ def parse_arguments(self, jupyter_mode=False): default=0, help="Use mixed precision or not" ) + parser.add_argument( + "--transfer_learning", + type=int, + default=0, + help="Use transfer learning or not" + ) if jupyter_mode: diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml index 7977a609..53a115ce 100644 --- a/configs/training/resnet50/caltech_resnet50_training.yml +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -3,7 +3,10 @@ trial_num: 1 # algorithm -algo: 'training' +algo: 'hc_iter' +weight_training: True +transfer_learning: True + # Architecture arch: resnet50 @@ -23,7 +26,6 @@ wd: 0 momentum: 0.9 batch_size: 16 -weight_training: True # ===== Sparsity =========== # conv_type: SubnetConv diff --git a/models/resnet.py b/models/resnet.py index b8e8b4c0..d301cc43 100644 --- a/models/resnet.py +++ b/models/resnet.py @@ -122,6 +122,10 @@ def __init__(self, builder, block, layers, num_classes=1000, base_width=64): self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1) else: self.fc = builder.conv1x1(512 * block.expansion, num_classes) + + if parser_args.transfer_learning: + self.dropout = nn.Dropout2d(0.4) + def _make_layer(self, builder, block, planes, blocks, stride=1): downsample = None @@ -159,7 +163,13 @@ def forward(self, x): x = self.layer3(x) x = self.layer4(x) - x = self.avgpool(x) + if parser_args.transfer_learning: + batch = x.size()[0] + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + else: + x = self.avgpool(x) + x = self.fc(x) x = x.view(x.size(0), -1) @@ -167,16 +177,16 @@ def forward(self, x): # ResNet }}} -def ResNet18(pretrained=False): - return ResNet(get_builder(), BasicBlock, [2, 2, 2, 2], 1000) +def ResNet18(pretrained=False, num_classes=1000): + return ResNet(get_builder(), BasicBlock, [2, 2, 2, 2], num_classes=num_classes) -def ResNet50(pretrained=False): - return ResNet(get_builder(), Bottleneck, [3, 4, 6, 3], 1000) +def ResNet50(pretrained=False, num_classes=1000): + return ResNet(get_builder(), Bottleneck, [3, 4, 6, 3], num_classes=num_classes) -def ResNet101(pretrained=False): - return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], 200) +def ResNet101(pretrained=False, num_classes=200): # default: tinyImagenet + return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], num_classes=num_classes) #return ResNet(get_builder(), Bottleneck, [3, 4, 23, 3], 1000) From f0632d5a71aebc28210ef2b2b73cfa6112b3dd0a Mon Sep 17 00:00:00 2001 From: "J.Sohn" Date: Sun, 16 Jan 2022 17:15:28 -0600 Subject: [PATCH 03/14] caltech data loading and pretrained imagenet model loading --- .../resnet50/caltech_resnet50_training.yml | 2 +- data/__init__.py | 1 + main.py | 11 ++-- main_utils.py | 12 +++-- utils/net_utils.py | 51 +++++++++++++++++++ 5 files changed, 67 insertions(+), 10 deletions(-) diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml index 53a115ce..8e0ddce3 100644 --- a/configs/training/resnet50/caltech_resnet50_training.yml +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -12,7 +12,7 @@ transfer_learning: True arch: resnet50 # ===== Dataset ===== # -dataset: caltech101 +dataset: Caltech101 name: resnet50_caltech101_training # ===== Learning Rate Policy ======== # diff --git a/data/__init__.py b/data/__init__.py index a70f282b..a785522f 100644 --- a/data/__init__.py +++ b/data/__init__.py @@ -3,3 +3,4 @@ from data.tinyimagenet import TinyImageNet from data.mnist import MNIST from data.bigcifar import BigCIFAR10 +from data.caltech101 import Caltech101 \ No newline at end of file diff --git a/main.py b/main.py index f86d9c1f..d5c6fdf5 100644 --- a/main.py +++ b/main.py @@ -43,15 +43,17 @@ def main_worker(gpu, ngpus_per_node): result_root = result_subroot + '/results_' + idty_str + '/' else: result_root = 'results/results_' + idty_str + '/' - + data = get_dataset(parser_args) if not os.path.isdir(result_root): os.mkdir(result_root) - model = get_model(parser_args) + if parser_args.transfer_learning: + model = get_model(parser_args, data.num_classes) + model = test_and_load_pretrained_imagenet(model, data.val_loader) + else: + model = get_model(parser_args) print_model(model, parser_args) - - if parser_args.weight_training: model = round_model(model, round_scheme="all_ones", noise=parser_args.noise, ratio=parser_args.noise_ratio, rank=parser_args.gpu) @@ -66,7 +68,6 @@ def main_worker(gpu, ngpus_per_node): else: model2 = None optimizer = get_optimizer(parser_args, model) - data = get_dataset(parser_args) scheduler = get_scheduler(optimizer, parser_args.lr_policy) #lr_policy = get_policy(parser_args.lr_policy)(optimizer, parser_args) if parser_args.label_smoothing is None: diff --git a/main_utils.py b/main_utils.py index 36438128..ba938a24 100644 --- a/main_utils.py +++ b/main_utils.py @@ -38,6 +38,7 @@ redraw, get_layers, get_prune_rate, + test_and_load_pretrained_imagenet ) from utils.schedulers import get_scheduler from utils.utils import set_seed, plot_histogram_scores @@ -975,17 +976,20 @@ def get_dataset(parser_args): return dataset -def get_model(parser_args): +def get_model(parser_args, num_classes=-1): if parser_args.first_layer_dense: parser_args.first_layer_type = "DenseConv" print("=> Creating model '{}'".format(parser_args.arch)) if parser_args.fixed_init: set_seed(parser_args.seed_fixed_init) - if parser_args.arch in ['Conv4', 'Conv4Normal']: - model = models.__dict__[parser_args.arch](width=parser_args.width) + if parser_args.transfer_learning: + model = models.__dict__[parser_args.arch](num_classes=num_classes) else: - model = models.__dict__[parser_args.arch]() + if parser_args.arch in ['Conv4', 'Conv4Normal']: + model = models.__dict__[parser_args.arch](width=parser_args.width) + else: + model = models.__dict__[parser_args.arch]() if parser_args.fixed_init: set_seed(parser_args.seed) diff --git a/utils/net_utils.py b/utils/net_utils.py index 6d1d7f63..6bbbb9ad 100644 --- a/utils/net_utils.py +++ b/utils/net_utils.py @@ -702,3 +702,54 @@ def zero_one_loss(output, target): pred = pred.t() zero_one_loss_instance = ~pred.eq(target.view(1, -1).expand_as(pred)) return torch.mean(zero_one_loss_instance.to(torch.float32)) + + +def test_and_load_pretrained_imagenet(model, dataloader): + + # check initial model + print('our initial model on transfer task') + val_loss, val_accuracy = validate(model, dataloader) + + # load pytorch pretrained model (imagenet) + imagenet_model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + print('pretrained model on transfer task') + val_loss, val_accuracy = validate(imagenet_model, dataloader) + + # copy weights from imagenet_model to model + import pdb; pdb.set_trace() + + + # check updated model + print('our updated model on transfer task') + val_loss, val_accuracy = validate(model, dataloader) + + return model + + +#validation function +def validate(model, dataloader): + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU + criterion = nn.CrossEntropyLoss() + + print('Validating') + model.eval() + running_loss = 0.0 + running_correct = 0 + with torch.no_grad(): + for i, data in enumerate(dataloader): + data, target = data[0].to(device), data[1].to(device) + outputs = model(data) + #loss = criterion(outputs, torch.max(target, 1)[1]) + loss = criterion(outputs, target) + + running_loss += loss.item() + _, preds = torch.max(outputs.data, 1) + #running_correct += (preds == torch.max(target, 1)[1]).sum().item() + running_correct += (preds == target).sum().item() + + loss = running_loss/len(dataloader.dataset) + accuracy = 100. * running_correct/len(dataloader.dataset) + print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') + + return loss, accuracy + From 62d993e2f799dc4f5b2a2c6bef79e3722b7ce5a3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 17 Jan 2022 04:01:29 +0000 Subject: [PATCH 04/14] network name change --- configs/training/resnet50/caltech_resnet50_training.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml index 53a115ce..8c7186dc 100644 --- a/configs/training/resnet50/caltech_resnet50_training.yml +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -9,7 +9,7 @@ transfer_learning: True # Architecture -arch: resnet50 +arch: ResNet50 # ===== Dataset ===== # dataset: caltech101 From 008dad0e6a3e937a56a63423aef8d5421b395c60 Mon Sep 17 00:00:00 2001 From: "J.Sohn" Date: Sun, 16 Jan 2022 22:03:38 -0600 Subject: [PATCH 05/14] caltech101 dataset loading --- data/caltech101.py | 105 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 data/caltech101.py diff --git a/data/caltech101.py b/data/caltech101.py new file mode 100644 index 00000000..59e4708d --- /dev/null +++ b/data/caltech101.py @@ -0,0 +1,105 @@ + +import joblib +import cv2 +import os +import time +import random +import numpy as np + +from imutils import paths +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split + +# Load torch...!!! +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +# Load torchvision ...!!! +from torchvision import transforms + + +from args_helper import parser_args + + +class Caltech101: + def __init__(self, args): + super(Caltech101, self).__init__() + + + image_paths = list(paths.list_images('./101_ObjectCategories')) + + data = [] + labels = [] + for img_path in image_paths: + label = img_path.split(os.path.sep)[-2] + if label == "BACKGROUND_Google": + continue + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + data.append(img) + labels.append(label) + + data = np.array(data) + labels = np.array(labels) + + + lb = LabelEncoder() + labels = lb.fit_transform(labels) + print(f"Total Number of Classes: {len(lb.classes_)}") + + train_transforms = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), + ]) + + val_transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]), + ]) + + # divide the data into train, validation, and test set + (X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) + (x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) + print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}") + + + train_data = CustomDataset(x_train, y_train, train_transforms) + val_data = CustomDataset(x_val, y_val, val_transform) + test_data = CustomDataset(x_test, y_test, val_transform) + + self.train_loader = DataLoader(train_data, batch_size=BS, shuffle=True, num_workers=4) + self.test_loader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) + self.num_classes = len(lb.classes_) + + #valLoader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) + #testLoader = DataLoader(test_data, batch_size=BS, shuffle=True, num_workers=4) + + +# custom dataset class +class CustomDataset(Dataset): + def __init__(self, images, labels= None, transforms = None): + self.labels = labels + self.images = images + self.transforms = transforms + + def __len__(self): + return len(self.images) + + def __getitem__(self, index): + data = self.images[index][:] + + if self.transforms: + data = self.transforms(data) + + if self.labels is not None: + return (data, self.labels[index]) + else: + return data From cde2885690d25275e6ba26db548dee4873ad058a Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 17 Jan 2022 08:17:00 +0000 Subject: [PATCH 06/14] caltech101, resnet50, weight training in our repo --- args_helper.py | 6 + .../resnet50/caltech_resnet50_training.yml | 16 ++- data/caltech101.py | 10 +- main.py | 2 +- main_utils.py | 2 +- models/resnet.py | 18 ++- .../BACKGROUND_Google/tmp | Bin 0 -> 5642 bytes utils/conv_type.py | 3 + utils/net_utils.py | 127 +++++++++++++++++- 9 files changed, 165 insertions(+), 19 deletions(-) create mode 100644 test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp diff --git a/args_helper.py b/args_helper.py index 416177a2..c46b2ae2 100644 --- a/args_helper.py +++ b/args_helper.py @@ -299,6 +299,12 @@ def parse_arguments(self, jupyter_mode=False): default=False, help="Boolean flag to indicate whether to use bias" ) + parser.add_argument( + "--bias_lastlayer", + action="store_true", + default=False, + help="Boolean flag to indicate whether to use bias at the last layer" + ) parser.add_argument( "--freeze-weights", action="store_true", diff --git a/configs/training/resnet50/caltech_resnet50_training.yml b/configs/training/resnet50/caltech_resnet50_training.yml index 3de29901..b12a08f6 100644 --- a/configs/training/resnet50/caltech_resnet50_training.yml +++ b/configs/training/resnet50/caltech_resnet50_training.yml @@ -1,6 +1,7 @@ subfolder: resnet50_caltech_weight_training trial_num: 1 - +score_init: 'all_one' +bias_lastlayer: True # algorithm algo: 'hc_iter' @@ -21,7 +22,7 @@ lr: 0.0001 lr_policy: constant_lr #multistep_lr #cosine_lr #constant_lr # ===== Network training config ===== # -epochs: 5 +epochs: 5 #5 wd: 0 momentum: 0.9 batch_size: 16 @@ -29,7 +30,7 @@ batch_size: 16 # ===== Sparsity =========== # conv_type: SubnetConv -bn_type: NonAffineBatchNorm +bn_type: AffineBatchNorm #NonAffineBatchNorm freeze_weights: True prune_rate: -1 init: kaiming_normal @@ -39,3 +40,12 @@ scale_fan: True workers: 4 gpu: 0 + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/data/caltech101.py b/data/caltech101.py index 59e4708d..a8b73203 100644 --- a/data/caltech101.py +++ b/data/caltech101.py @@ -29,7 +29,7 @@ def __init__(self, args): super(Caltech101, self).__init__() - image_paths = list(paths.list_images('./101_ObjectCategories')) + image_paths = list(paths.list_images('test_caltech101/101_ObjectCategories')) data = [] labels = [] @@ -75,12 +75,12 @@ def __init__(self, args): val_data = CustomDataset(x_val, y_val, val_transform) test_data = CustomDataset(x_test, y_test, val_transform) - self.train_loader = DataLoader(train_data, batch_size=BS, shuffle=True, num_workers=4) - self.test_loader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) + self.train_loader = DataLoader(train_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + self.val_loader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) self.num_classes = len(lb.classes_) - #valLoader = DataLoader(val_data, batch_size=BS, shuffle=True, num_workers=4) - #testLoader = DataLoader(test_data, batch_size=BS, shuffle=True, num_workers=4) + #valLoader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + #testLoader = DataLoader(test_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) # custom dataset class diff --git a/main.py b/main.py index d5c6fdf5..907e7fb9 100644 --- a/main.py +++ b/main.py @@ -48,7 +48,7 @@ def main_worker(gpu, ngpus_per_node): os.mkdir(result_root) if parser_args.transfer_learning: model = get_model(parser_args, data.num_classes) - model = test_and_load_pretrained_imagenet(model, data.val_loader) + model = load_pretrained_imagenet(model, data.val_loader) else: model = get_model(parser_args) print_model(model, parser_args) diff --git a/main_utils.py b/main_utils.py index ba938a24..0dcd77bd 100644 --- a/main_utils.py +++ b/main_utils.py @@ -38,7 +38,7 @@ redraw, get_layers, get_prune_rate, - test_and_load_pretrained_imagenet + load_pretrained_imagenet ) from utils.schedulers import get_scheduler from utils.utils import set_seed, plot_histogram_scores diff --git a/models/resnet.py b/models/resnet.py index d301cc43..3ecb4333 100644 --- a/models/resnet.py +++ b/models/resnet.py @@ -1,4 +1,5 @@ import torch.nn as nn +import torch.nn.functional as F from args_helper import parser_args from utils.builder import get_builder @@ -121,7 +122,13 @@ def __init__(self, builder, block, layers, num_classes=1000, base_width=64): if parser_args.last_layer_dense: self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1) else: - self.fc = builder.conv1x1(512 * block.expansion, num_classes) + if parser_args.bias_lastlayer: + tmp = parser_args.bias + parser_args.bias = True + self.fc = builder.conv1x1(512 * block.expansion, num_classes) + parser_args.bias = tmp + else: + self.fc = builder.conv1x1(512 * block.expansion, num_classes) if parser_args.transfer_learning: self.dropout = nn.Dropout2d(0.4) @@ -147,7 +154,7 @@ def _make_layer(self, builder, block, planes, blocks, stride=1): return nn.Sequential(*layers) - def forward(self, x): + def forward(self, x, hidden=False): # update score thresholds for global ep if parser_args.algo in ['global_ep', 'global_ep_iter'] or parser_args.bottom_k_on_forward: prune(self, update_thresholds_only=True) @@ -163,10 +170,13 @@ def forward(self, x): x = self.layer3(x) x = self.layer4(x) + if hidden: + return x + if parser_args.transfer_learning: - batch = x.size()[0] - x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = F.adaptive_avg_pool2d(x, 1).reshape(x.size(0), -1) x = self.dropout(x) + x = x.view(x.size(0), -1, 1, 1) else: x = self.avgpool(x) diff --git a/test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp b/test_caltech101/101_ObjectCategories/BACKGROUND_Google/tmp new file mode 100644 index 0000000000000000000000000000000000000000..96905fe5a19ae4347285ddc2c6e537f4b10d675a GIT binary patch literal 5642 zcmb7{S5VV$@b15%hZ;h!CP2W@LkB@xC<#4)RFU2kkX}TjH|ZUu6QnC$K%^+rMXEGK zdJ~W?io)UdKWFCLoSUmGoX3`hXl2tix`0WFA-7IfVQumS)9 z;mv3NZx9lJ2#5iao1QuZxG6z|BxF<|GQ$5}fCy*-LLy=~ln$j#&t+ia8Aigu%_ElD z(1;LMQN`JMg_rccpE#6clrXaMNh@7J8&3YG0s4PxkN`J56fHnN03swH022}XkHn1z zKuAjjg`e*}OMrrx9&%g~K@e(}RIN)C6V{=}&;T>V4p*-gd zNLnfI0JKQZST;KQvEtKGNPGB~`m)M?KkNrz=CS)`QesIHCQ_A#g^dWygLDVDiw=_l zTwCREt(8o`J@U?YQ1<2fQ)@3X9>m|gr{SzBsxnlhUHcIcjMbN>$JVaG{ut;>ZJcvb zWTv=?;H{TJ!d*Ya%mf>7fEw<15&%$gwX6g6o$@cVyM5YNeeW`1sxGIcvIW7 z%OY2V7j3%__CWol__yriZ&6?@QMDrBOb6Y=HGWUf&ScQuf)_pS*(-jo1Lxjc^=PPG z>x>s^{5Tep`pVcZ*o*FK_mZr!zkLir>^b6DC*-7&;K`~=8a{<$RXnLq z0tJ(eX_lw)JZ@0>KEXF@3@`jTuvM8nrLnnTYCTpg9eE(kZtf#t$e~bWJ-ee)+kI>-*7-TrpLF7EDn!=R-W{VMw zr52@8?i2B)a;>E4y?dB!;leq3COtPw$b3}Y0bnHHUDY=Vg@(S?$|WLaCYK*w9T7yk zJjp$3h*2dsaT0%yqu6JIuHq>n=`JqAo6^p8l{Fj4D(&KSIzJDgxn-u$k9-Jvk7n85 zP8V4xCGJ|jvq=tUXU=O<87&7pWPP0)w}A;;7}VhsDn=+%|NbqxhzjCsq*9PS%bY?t zTK?|)9^CtIQWl|p*9{$%Kt4<-7G<3{L-%xz# zWO63!jY>peLU)q&qr)hBI4`UcsW3bVoz2~qs?=*#vs9M7ZM4{49Ok*@hsE6>bf#Tf zA1*Wx1MhN^IS98@tT4Vq!`1n8ioSwI2QfL|Sg4>PR8gO?5Tw~vFRxlFSWGk>3iEq> z9;ChH2s<7}_xSt2Kesfi61&n?S(v3(Hr)7~;KJkzL3_sN$*4h_bK}99Q`J=V`$B)LW5n zU{`j}EtV?1oxV?<_~*vzmM%-*Y}qlVB^DDG{53Dy*)ID;9T}{7{8e2#hNIDD=Vg!W zwYGu~Ij2e@u#4*A?jgNScqF^McQI4gp|RE}-x5SYmDW0Yva%(19-o5M5X?<2r5?yS!nz<*+#(Ue5W68>+)26zgUXqz5quVCQYY})C$Fd2A zWB}|+2KblpDn*;=1C^>k)`w;hb)Y0Q=GbL-8J+=e>h56c3`?7k>m6exS#DU}ExE$} z`j<{veznhs7;WBa)3Ee;@zj7!q|tPH*{({Ne$tl!l$GT@!&J@yN&UXTwdvTS(Dis* z{;02$V7O#8PoFte&}O+3Hl`aISRv8@a@IUj5YDF)lqCSw>Kgf6TIk@xnluzIecDihQ0508b+R5cG}4`XWygOk$~G#uUDe|cku)RIl&L~Us~zYk9$QE zJN`vH(rT{WzNMfDE{~l=W4AB$?%}YJsKJXdTwG_k;`^cyPGx>g9#o`hz@DqVyVjqT zV~IbqKSM2=ftuN|`~FUzxwFkJjrgCN?mr>DcVAUn9j31irwMbyLWTU=tM?Wn(00LvZ zjRH*iymW%zMd2?FLkE3oZkw5nm0HR(mTOXDX9V~Jm_>QOYM9(spNj1o8Wy|x0{%+{ z?j>qyRW`ID1m~Yr!ojh;I@$h)E5;q?e@?c&)4a6dXn3ja`gviGsUD`|{_ym@3zLqW zv;7SiBIXq(rB3bdM!4Pr~?0e_hEe zur*%W$)1ZZ=Bq_n3Cz}{~&y`tn~M@t#|=GIL4^BRoXxW8lT(@z%w(Xtec3y|U+i#72UY5%7^(L|&D3i}@@xi)5{c>_T(j8{0 zPOWy5Y!BE4(v$gJVu>~KD3(*1G~0i$sbS{3WvG)B#|Gt)UxWp!?uDm?MyBq7W+??G z6jA~)8+FG_QYtf5Q^UL`(!ot_AI@%r-{^q^Co`6_?<6yuPYH=TZ_51s-R^fcH+r`} zk31A(l4cdMlVm<7EPE2YWL^xq`d9A!IZtzhD$Sp>gp`~}DyUc@PLDhFHHH@#a7K0; z<{`5}+EDXom8;5{o4b}_=woX|nQgaE-()2#hNf-ghBNv$dhIteD5C$MDVK}Gr&ol0 zy4%Y*HT}jUc`4|UGred*hTOzRPJTOj&8QC3VxXunA-j zaoBCBt<74MIq>>K4;`|qEB)hBFcB;$6Vbhqc@?+^Ba!f=z+5 zq;fBfyE=pTOI-cni-@=D17>N#2I|UGjoSB`m|RgifwFCiez(dYU;R$L@OiiC+J~#{ zK|by@U0pmi*YfdIK>}7vjwcUhX?DPrqK8zfR3BIdBP%&NPjhEW4IfrgvguJz(6B^Z z1G}_cjx+k8-z;abVp$L|T`l*RYW#g%nrxhzOI0jsY;PW0B)7kTR9<24PcoD(A?coI z%LL4vUr+y6WWY{Mo%eUe+}$(kbC3jY9Z+DVKtksN$FhzqFM&T-bDCoFkuMq<>3eD` zhqDX%-uBDN$bae}TNGOUQeeWtnT!;QUgj=cqD~YS=GN59&!<|9^zmi&@M>^6Pnaoa zo_lR?Zs+aq@iQ__IH-RD=c6dwA#)Af;Z}kFkCiwQXAr8P<^ub^2Yg_t7UM0qUbY#bon?MB3fMtIu5T$)8UDVLW$ithwSu-toU=k zq9wL6kOYodcAjqzRxu&h&0NLI6Cs@4n8F)8ugtjne)%`?!6h%1W`V?{CgJK-r!QIF zYPx%k&56M0Z>Z@hDAFE}=*;u}3Y&SbJ^ojl_)Kz-CWGHo7O_~h^6<_@UTXJQf~pIS zJeo{Fh}^6jZ?Lub0o_v4ui^18f8K&xs= zMSY^3&F0!SQ8;2lkTXAHJTJ8rGX4uq)b>cj{HK4YFGG1k5=xbT17E9<5$6x}c@oR- zQbdYBTr|YOW=P zw#^$GRbGP1>*1E^%3_?O(ar5P=~Lw29LxQJO34Hhd(L7V9dNiZygcVSRfOrqH9-4B zqAT@}PrgHkg=qOVIU(^6Qly=5yfIJGF8%0U=x$#I}w$&!;Pkf-~qj7*0AtNQMp0WOV*Ykv8V*N^pT2c>K{eZ>5=+-}J;rRZu0jm{&nm+AyrL=_=?!gZ zpxZnk3NGuHsF-G0+^Q5(k_{lLXnr=Wrl3qez1$d_r~ZV9 z#qh&&am)~euvZ>?doi4d@>GX$;hjBg!;L9LOlk3EVZIluCL`3{1RgiqfCT_W^~zTi9gq2%O)|k9$yfx7dxzb@kfm_ zgT z+gql2v@!NNezzQ1;cxO!w@#RwO_8J!$>!=@f4Curz2M}$*fvb~J~|nHS@ue@Qf-Zt zpy16@XQl*)eD8c8jnF1S+C1X7GLdy9DsC+T7=8PnJA;Y7`uHNven~Z-_GShog_Fkc zK3my1#QR+-c@quofMTD9v0ICxCD5K;vKxW}^F+)ftLWgyh8tjP1)F+S_NKKh zQO%4J`zGRK=HmUwhcAjBV$HadO@fW~^;g6V3Sa+;A%6H(C#uRXA+A{D1=clE*lY`{ z-@B{ex#Rq@)PX%TlW;wKz>g@PWvnoljvy;~zS7%ZW}Y)V{uf+gGF7xS*M)lVXddY3 z1dO!z9WxpZ1sBu{RO432&6=F?{X4cu-hFgSqIi-XeOCP3X8FB%efn%@{~E~av7SQ5 zRg=ifo^mvE9zQjE47)WZTfh{S67u}xOFRz>64!>!4=zM)rFx~sn~}{|A;fwR1uK8<;3G`5?}yWh-f7h> zg#4IZPCxlOu#|nueT(dyeLM0k1P9PxCWRPL#=)dUgCd<~zitR`-UI2+y^)|PC%@>~ z9VXW>pmiu8q>V?qbs_6a_)Xx2xDF*g8|KJ6Fu|441S)|E;5#JUoV~3^7%SFCYqrS?l z)vst{I%fGTF0SeCMbM+;yp!ipz$<_-=T(Z~n3o03d%=5AP`nWN4U`hAiWu%WrgzJpdMnrsiVLsXRZgba$?dyG(7NFW_x`aOq z2c;P03lHdBizPx)Q=$aq%Y`_C3{aSog>rAk`sPvKHdM10mNdwq^KwI$4&5PHT*pIV zRPXPzn{1U4iATVQ5&&tk9E<6lG0$Hy_toM>DGy^0-Pa*09f9#MK)ov98p!Z@fy}8d z)WTIDV8kO!(nrbuU}KK7<`EFJq?{_#G^fb!_7fEjw`<`1QcvJcZ`E$5C|7iu0S2_G zwKTWJq!BR{4q5SV;Ixsj?gU8}e4D|-?q`z?BL5BC!F5o*oyPO-?HgGB+?vfl!DKPs75fb zvXcG{IVK(>0TmLY_wKfgV43t3uvw|K#3hX~ve6UEXGR^lTJ+-6k>$!tquC#Gn`Ssa ziH(P2AsOlKBIM1vgyBMU@G18)4sboGulm z{}Su@3_a_7a(nw%&Fnv3H_(+Ekf`z+ z;G8?JR$*L7qo1diMwcK}RP<9a5_?DpxMymP|KyUI1TqWBpT@fmA;CwFHb8A|h&Qbw z5|(7sZYQ{mnA44G;Ee5P0G>d(EwR4kF?|h)T4fFmiPQAr>nLi)3j~V9ALY6VtC;1d jCb5;3xj@TDgyUQ=j*r 0: # conv_layers.append(layer[basic_block_id].shortcut[0]) + linear_layers = [model.fc] elif arch == 'vgg16': conv_layers = [] @@ -102,6 +105,26 @@ def get_layers(arch='Conv4', model=None): return (conv_layers, linear_layers) + +def get_bn_layers(arch='ResNet50', model=None): + if isinstance(model, nn.parallel.DistributedDataParallel): + model = model.module + + if arch == 'ResNet50': + bn_layers = [model.bn1] + for layer in [model.layer1, model.layer2, model.layer3, model.layer4]: + for basic_block_id in [i for i in range(len(layer))]: + bn_layers.append(layer[basic_block_id].bn1) + bn_layers.append(layer[basic_block_id].bn2) + bn_layers.append(layer[basic_block_id].bn3) + + return bn_layers + + + + + + def redraw(model, shuffle=False, reinit=False, invert=False, chg_mask=False, chg_weight=False): cp_model = copy.deepcopy(model) conv_layers, linear_layers = get_layers(parser_args.arch, cp_model) @@ -704,32 +727,126 @@ def zero_one_loss(output, target): return torch.mean(zero_one_loss_instance.to(torch.float32)) +def load_pretrained_imagenet(model, dataloader): + + + pretrained = imagenet_ResNet50(pretrained=True).cuda() + model_s = pretrained.model # source model + #model_s = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') # source model + #model_s = model_s.cuda() + + #for param_tensor in model_s.state_dict(): + # print(param_tensor, "\t", model_s.state_dict()[param_tensor].size()) + PATH = 'pretrained_model_imagenet.pth' + torch.save(model_s.state_dict(), PATH) + model.load_state_dict(torch.load(PATH), strict=False) + + # test the consistency of model and pretrained model + model = model.cuda() + x = torch.rand(16,3,224,224).cuda() # random dataset + z1 = model.forward(x, hidden=True) + z2 = pretrained(x, hidden=True) + #z2 = model_s.features(x) + print('Compare hidden feature: ', (z1 == z2).all()) + + + # load the final layer + num_classes = pretrained.l0.weight.shape[0] + model.fc.weight.data = pretrained.l0.weight.data.view(num_classes, -1, 1, 1) + model.fc.bias.data = pretrained.l0.bias.data + + y1 = model(x) + y2 = pretrained(x) + print('Compare prediction: ', torch.norm(y1 - y2)) + print('Note: this is small only if we turn off dropout at both loaded/our models') + #pdb.set_trace() + + return model + + +class imagenet_ResNet50(nn.Module): + def __init__(self, pretrained): + super(imagenet_ResNet50, self).__init__() + if pretrained is True: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + else: + self.model = pretrainedmodels.__dict__['resnet50'](pretrained = None) + # change the classification layer + self.l0= nn.Linear(2048, 101) + self.dropout = nn.Dropout2d(0.4) + + def forward(self, x, hidden=False): + # get the batch size only, ignore(c, h, w) + batch, _, _, _ = x.size() + x = self.model.features(x) + if hidden: + return x + x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) + x = self.dropout(x) + l0 = self.l0(x) + return l0 + + +''' def test_and_load_pretrained_imagenet(model, dataloader): + model = model.cuda() + x = torch.rand(16,3,224,224).cuda() # random dataset + z1 = model.forward(x, hidden=True) + + # load pytorch pretrained model (imagenet) + # imagenet_model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') + # imagenet_model = imagenet_model.cuda() + imagenet_model = imagenet_ResNet50(pretrained=True).cuda() + #z2 = imagenet_model.forward(x, hidden=True) # check initial model print('our initial model on transfer task') val_loss, val_accuracy = validate(model, dataloader) - # load pytorch pretrained model (imagenet) - imagenet_model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet') print('pretrained model on transfer task') val_loss, val_accuracy = validate(imagenet_model, dataloader) # copy weights from imagenet_model to model - import pdb; pdb.set_trace() - + #import pdb; pdb.set_trace() + conv, lin = get_layers('ResNet50', model) + layers = [*conv, *lin] + conv2, lin2 = get_layers('ResNet50', imagenet_model.model) + layers2 = [*conv2, *lin2] + + for target_layer, source_layer in zip(layers, layers2): + if source_layer is None: + continue + #print(target_layer, source_layer) + assert(target_layer.weight.data.shape == source_layer.weight.data.shape) + target_layer.weight.data = source_layer.weight.data + + bn = get_bn_layers('ResNet50', model) + bn2 = get_bn_layers('ResNet50', imagenet_model.model) + + for target_layer, source_layer in zip(bn, bn2): + assert(target_layer.weight.data.shape == source_layer.weight.data.shape) + target_layer.weight.data = source_layer.weight.data + target_layer.bias.data = source_layer.bias.data + z3 = model.forward(x, hidden=True) # check updated model print('our updated model on transfer task') val_loss, val_accuracy = validate(model, dataloader) + print((z1 == z2).all()) + print((z3 == z2).all()) + print((z3 == z2).all()) + import pdb; pdb.set_trace() + return model + #validation function def validate(model, dataloader): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU criterion = nn.CrossEntropyLoss() + model = model.to(device) print('Validating') model.eval() @@ -752,4 +869,4 @@ def validate(model, dataloader): print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') return loss, accuracy - +''' From e7db14b9e83ecec3b18e64b9c4f3abfe065005fd Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 19 Jan 2022 20:36:43 +0000 Subject: [PATCH 07/14] transfer learning for weight training (1FC, 2FC versions) --- args_helper.py | 10 +++- caltech_exec_GD.sh | 21 ++++++-- .../caltech_resnet50_training_1FC.yml | 51 ++++++++++++++++++ .../caltech_resnet50_training_2FC.yml | 52 +++++++++++++++++++ main_utils.py | 5 +- models/resnet.py | 29 +++++++++-- utils/net_utils.py | 14 ++--- utils/schedulers.py | 4 ++ 8 files changed, 168 insertions(+), 18 deletions(-) create mode 100644 configs/training/resnet50/caltech_resnet50_training_1FC.yml create mode 100644 configs/training/resnet50/caltech_resnet50_training_2FC.yml diff --git a/args_helper.py b/args_helper.py index c46b2ae2..c64463dd 100644 --- a/args_helper.py +++ b/args_helper.py @@ -300,10 +300,10 @@ def parse_arguments(self, jupyter_mode=False): help="Boolean flag to indicate whether to use bias" ) parser.add_argument( - "--bias_lastlayer", + "--bias_fc", #_lastlayer", action="store_true", default=False, - help="Boolean flag to indicate whether to use bias at the last layer" + help="Boolean flag to indicate whether to use bias at the last fc layers" ) parser.add_argument( "--freeze-weights", @@ -861,6 +861,12 @@ def parse_arguments(self, jupyter_mode=False): default=0, help="Use transfer learning or not" ) + parser.add_argument( + "--uv_decomp", + type=int, + default=0, + help="Use W=UV decomposition or not" + ) if jupyter_mode: diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh index b123e9c4..1a048941 100755 --- a/caltech_exec_GD.sh +++ b/caltech_exec_GD.sh @@ -1,12 +1,25 @@ # ResNet50_tf (changed the output size) # Weight training -python main.py --config configs/training/resnet50/caltech_resnet50_training.yml +#python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 +python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 # HC -#python main.py --config configs/hypercube/tinyImageNet/resnet18/resnet18_sparsity_1_4_adam_9lam6.yml > log_tiny_hc_sparsity_1_4_adam_9lam6 2>&1 +#:< log_caltech_hc_sparsity_50_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_unconstrained.yml > log_caltech_hc_sparsity_unconstrained_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5.yml > log_caltech_hc_sparsity_5_2lam6_UV 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2.yml > log_caltech_hc_sparsity_2_5lam6_UV 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5.yml > log_caltech_hc_sparsity_0_5_1_5lam5_UV 2>&1 +#BLOCK +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5.yml > log_caltech_hc_sparsity_0_5_2lam5_50epoch_mutli 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_0_5_1lam5.yml > log_caltech_hc_sparsity_0_5_1lam5_50epoch_mutli 2>&1 # EP -#python main.py --config configs/ep/tinyImageNet/resnet18/sparsity_0_75.yml > log_tiny_ep_sparsity_0_75 2>&1 - +#:< log_caltech_ep_sparsity_50_UV_1000_bias_real_final 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_5.yml > log_caltech_ep_sparsity_5_UV 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_2.yml > log_caltech_ep_sparsity_2_UV 2>&1 +#python main.py --config configs/ep/resnet50/caltech101/caltech101_resnet50_ep_sparsity_0_5.yml > log_caltech_ep_sparsity_0_5_UV 2>&1 +#BLOCK diff --git a/configs/training/resnet50/caltech_resnet50_training_1FC.yml b/configs/training/resnet50/caltech_resnet50_training_1FC.yml new file mode 100644 index 00000000..a98e8b9b --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training_1FC.yml @@ -0,0 +1,51 @@ +subfolder: resnet50_caltech_weight_training_1FC +trial_num: 1 +score_init: 'all_one' +bias_fc: True + +# algorithm +algo: 'hc_iter' +weight_training: True +transfer_learning: True + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/configs/training/resnet50/caltech_resnet50_training_2FC.yml b/configs/training/resnet50/caltech_resnet50_training_2FC.yml new file mode 100644 index 00000000..9bc09dac --- /dev/null +++ b/configs/training/resnet50/caltech_resnet50_training_2FC.yml @@ -0,0 +1,52 @@ +subfolder: resnet50_caltech_weight_training_2FC +trial_num: 1 +score_init: 'all_one' +bias_fc: True +uv_decomp: True + +# algorithm +algo: 'hc_iter' +weight_training: True +transfer_learning: True + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_training + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.0001 +lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_rate: -1 +init: kaiming_normal +scale_fan: True + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_fine_tune: True +skip_sanity_checks: True + + diff --git a/main_utils.py b/main_utils.py index 0dcd77bd..b5929351 100644 --- a/main_utils.py +++ b/main_utils.py @@ -392,6 +392,7 @@ def finetune(model, parser_args, data, criterion, old_epoch_list, old_test_acc_b def get_idty_str(parser_args): train_mode_str = 'weight_training' if parser_args.weight_training else 'pruning' + epoch_str = parser_args.epochs dataset_str = parser_args.dataset model_str = parser_args.arch algo_str = parser_args.algo @@ -413,8 +414,8 @@ def get_idty_str(parser_args): run_idx_str = parser_args.run_idx lam_ft_str = parser_args.lam_finetune_loss n_step_ft_str = parser_args.num_step_finetune - idty_str = "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_finetune_{}_MAML_{}_{}_fan_{}_{}_{}_width_{}_seed_{}_idx_{}".\ - format(train_mode_str, dataset_str, model_str, algo_str, rate_str, period_str, reg_str, reg_lmbda, + idty_str = "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_finetune_{}_MAML_{}_{}_fan_{}_{}_{}_width_{}_seed_{}_idx_{}".\ + format(train_mode_str, epoch_str, dataset_str, model_str, algo_str, rate_str, period_str, reg_str, reg_lmbda, opt_str, policy_str, lr_str, lr_gamma, lr_adj, finetune_lr_str, lam_ft_str, n_step_ft_str, fan_str, w_str, s_str, width_str, seed_str, run_idx_str).replace(".", "_") diff --git a/models/resnet.py b/models/resnet.py index 3ecb4333..2956a556 100644 --- a/models/resnet.py +++ b/models/resnet.py @@ -1,5 +1,6 @@ import torch.nn as nn import torch.nn.functional as F +import pdb from args_helper import parser_args from utils.builder import get_builder @@ -122,16 +123,33 @@ def __init__(self, builder, block, layers, num_classes=1000, base_width=64): if parser_args.last_layer_dense: self.fc = nn.Conv2d(512 * block.expansion, num_classes, 1) else: - if parser_args.bias_lastlayer: + if parser_args.transfer_learning: + if parser_args.uv_decomp: + dim_size = 1000 + else: + dim_size = num_classes + else: + dim_size = num_classes + + if parser_args.bias_fc: tmp = parser_args.bias parser_args.bias = True - self.fc = builder.conv1x1(512 * block.expansion, num_classes) + self.fc = builder.conv1x1(512 * block.expansion, dim_size) parser_args.bias = tmp else: - self.fc = builder.conv1x1(512 * block.expansion, num_classes) + self.fc = builder.conv1x1(512 * block.expansion, dim_size) if parser_args.transfer_learning: self.dropout = nn.Dropout2d(0.4) + if parser_args.uv_decomp: + if parser_args.bias_fc: + tmp = parser_args.bias + parser_args.bias = True + self.fc2 = builder.conv1x1(dim_size, num_classes) + parser_args.bias = tmp + else: + self.fc2 = builder.conv1x1(dim_size, num_classes) + def _make_layer(self, builder, block, planes, blocks, stride=1): @@ -179,8 +197,11 @@ def forward(self, x, hidden=False): x = x.view(x.size(0), -1, 1, 1) else: x = self.avgpool(x) - + x = self.fc(x) + if parser_args.transfer_learning and parser_args.uv_decomp: + x = self.fc2(x) + x = x.view(x.size(0), -1) return x diff --git a/utils/net_utils.py b/utils/net_utils.py index f7b0dd79..a1243069 100644 --- a/utils/net_utils.py +++ b/utils/net_utils.py @@ -751,9 +751,9 @@ def load_pretrained_imagenet(model, dataloader): # load the final layer - num_classes = pretrained.l0.weight.shape[0] - model.fc.weight.data = pretrained.l0.weight.data.view(num_classes, -1, 1, 1) - model.fc.bias.data = pretrained.l0.bias.data + #num_classes = pretrained.l0.weight.shape[0] + #model.fc.weight.data = pretrained.l0.weight.data.view(num_classes, -1, 1, 1) + #model.fc.bias.data = pretrained.l0.bias.data y1 = model(x) y2 = pretrained(x) @@ -761,6 +761,9 @@ def load_pretrained_imagenet(model, dataloader): print('Note: this is small only if we turn off dropout at both loaded/our models') #pdb.set_trace() + print('pretrained model on transfer task') + val_loss, val_accuracy = validate(model_s, dataloader) + return model @@ -839,8 +842,7 @@ def test_and_load_pretrained_imagenet(model, dataloader): import pdb; pdb.set_trace() return model - - +''' #validation function def validate(model, dataloader): @@ -869,4 +871,4 @@ def validate(model, dataloader): print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') return loss, accuracy -''' + diff --git a/utils/schedulers.py b/utils/schedulers.py index 7e98f951..2826a124 100644 --- a/utils/schedulers.py +++ b/utils/schedulers.py @@ -4,6 +4,10 @@ def get_scheduler(optimizer, policy='multistep_lr', milestones=[80, 120], gamma=0.1, max_epochs=150): + if parser_args.epochs == 50: + milestones = [20, 40] + max_epochs = 50 + if parser_args.epochs == 200: milestones = [100, 150] max_epochs = 200 From a503139dc105b858332c700b96ef3d05f8155161 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 19 Jan 2022 22:16:58 +0000 Subject: [PATCH 08/14] bias=False, AffineBatchNorm (to follow original feature extractor) --- configs/training/resnet50/caltech_resnet50_training_1FC.yml | 2 +- configs/training/resnet50/caltech_resnet50_training_2FC.yml | 2 +- main.py | 1 + utils/net_utils.py | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/configs/training/resnet50/caltech_resnet50_training_1FC.yml b/configs/training/resnet50/caltech_resnet50_training_1FC.yml index a98e8b9b..262d2fc3 100644 --- a/configs/training/resnet50/caltech_resnet50_training_1FC.yml +++ b/configs/training/resnet50/caltech_resnet50_training_1FC.yml @@ -1,7 +1,7 @@ subfolder: resnet50_caltech_weight_training_1FC trial_num: 1 score_init: 'all_one' -bias_fc: True +#bias_fc: True # algorithm algo: 'hc_iter' diff --git a/configs/training/resnet50/caltech_resnet50_training_2FC.yml b/configs/training/resnet50/caltech_resnet50_training_2FC.yml index 9bc09dac..f2d99260 100644 --- a/configs/training/resnet50/caltech_resnet50_training_2FC.yml +++ b/configs/training/resnet50/caltech_resnet50_training_2FC.yml @@ -1,7 +1,7 @@ subfolder: resnet50_caltech_weight_training_2FC trial_num: 1 score_init: 'all_one' -bias_fc: True +#bias_fc: True uv_decomp: True # algorithm diff --git a/main.py b/main.py index 907e7fb9..d810f01a 100644 --- a/main.py +++ b/main.py @@ -54,6 +54,7 @@ def main_worker(gpu, ngpus_per_node): print_model(model, parser_args) + #import pdb; pdb.set_trace() if parser_args.weight_training: model = round_model(model, round_scheme="all_ones", noise=parser_args.noise, ratio=parser_args.noise_ratio, rank=parser_args.gpu) diff --git a/utils/net_utils.py b/utils/net_utils.py index a1243069..221ffc0e 100644 --- a/utils/net_utils.py +++ b/utils/net_utils.py @@ -754,7 +754,7 @@ def load_pretrained_imagenet(model, dataloader): #num_classes = pretrained.l0.weight.shape[0] #model.fc.weight.data = pretrained.l0.weight.data.view(num_classes, -1, 1, 1) #model.fc.bias.data = pretrained.l0.bias.data - + ''' y1 = model(x) y2 = pretrained(x) print('Compare prediction: ', torch.norm(y1 - y2)) @@ -763,6 +763,7 @@ def load_pretrained_imagenet(model, dataloader): print('pretrained model on transfer task') val_loss, val_accuracy = validate(model_s, dataloader) + ''' return model From b0b062254812f53a5dd4c56ee8c057bc11b1e540 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 20 Jan 2022 03:20:52 +0000 Subject: [PATCH 09/14] run for updated code (bias=False) --- caltech_exec_GD.sh | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh index 1a048941..40e5bd6b 100755 --- a/caltech_exec_GD.sh +++ b/caltech_exec_GD.sh @@ -2,7 +2,36 @@ # Weight training #python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 -python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 +#python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 + + + +# HC +python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_50_1FC.yml #> log_caltech_hc_sparsity_50_1FC 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_50_2FC.yml #> log_caltech_hc_sparsity_50_2FC 2>&1 + + +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5_1FC.yml #> log_caltech_hc_sparsity_5_1FC 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_5_2FC.yml #> log_caltech_hc_sparsity_5_2FC 2>&1 + + +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2_1FC.yml #> log_caltech_hc_sparsity_2_1FC 2>&1 +#python main.py --config configs/hypercube/resnet50/caltech101/caltech101_resnet50_hc_sparsity_2_2FC.yml #> log_caltech_hc_sparsity_2_2FC 2>&1 + + + + + + + + +# ======== OLD + + + + + + # HC #:< Date: Thu, 20 Jan 2022 07:08:19 +0000 Subject: [PATCH 10/14] computed sparsity for additional FC layer in resnet50 --- utils/net_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/net_utils.py b/utils/net_utils.py index 221ffc0e..358377d8 100644 --- a/utils/net_utils.py +++ b/utils/net_utils.py @@ -82,6 +82,9 @@ def get_layers(arch='Conv4', model=None): # if len(layer[basic_block_id].shortcut) > 0: # conv_layers.append(layer[basic_block_id].shortcut[0]) linear_layers = [model.fc] + if parser_args.uv_decomp: + linear_layers.append(model.fc2) + elif arch == 'vgg16': conv_layers = [] From 75904441ef5c55c82a080745dacc1dff9c0339f8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 22 Jan 2022 22:08:43 +0000 Subject: [PATCH 11/14] sanity check used for trial=1 --- configs/sanity/caltech_sanity.yml | 72 +++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 configs/sanity/caltech_sanity.yml diff --git a/configs/sanity/caltech_sanity.yml b/configs/sanity/caltech_sanity.yml new file mode 100644 index 00000000..28fcf402 --- /dev/null +++ b/configs/sanity/caltech_sanity.yml @@ -0,0 +1,72 @@ +subfolder: resnet50_caltech_1FC_sanity + +# algorithm +algo: 'hc_iter' +iter_period: 5 + + +# Architecture +arch: ResNet50 + +# ===== Dataset ===== # +dataset: Caltech101 +name: resnet50_caltech101_HC +transfer_learning: True + +# ===== Learning Rate Policy ======== # +optimizer: adam +lr: 0.001 +lr_policy: cosine_lr #cosine_lr #constant_lr +fine_tune_optimizer: adam +fine_tune_lr: 0.0001 +fine_tune_lr_policy: multistep_lr #cosine_lr #constant_lr + +# ===== Network training config ===== # +epochs: 50 #5 +wd: 0 +momentum: 0.9 +batch_size: 16 + + +# ===== Sparsity =========== # +conv_type: SubnetConv +bn_type: AffineBatchNorm #NonAffineBatchNorm +freeze_weights: True +prune_type: BottomK +#target_sparsity: 2 +# decide if you want to "unflag" +unflag_before_finetune: True +init: signed_constant +score_init: unif #skew #half #bimodal #skew # bern +scale_fan: False + +# ===== Rounding ===== # +round: naive +noise: True +noise_ratio: 0 + +# ===== Quantization ===== # +hc_quantized: True +quantize_threshold: 0.5 + +# ===== Regularization ===== # +regularization: L2 +#lmbda: 0.000005 + + +# ===== Hardware setup ===== # +workers: 4 +gpu: 0 + + +# ===== Checkpointing ===== # +checkpoint_at_prune: False + +# ==== sanity check ==== # +skip_sanity_checks: True +only_sanity: True +sanity_folder: results/resnet50_caltech_HC_sparsity_50_1FC/ + +#results/resnet50_caltech_HC_sparsity_5_1FC/ +#results/resnet50_caltech_HC_sparsity_2_1FC/ # AWS-4 + From f454a56034c3a8c3819d6f7705902718e634f4f1 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 9 Apr 2022 05:09:41 +0000 Subject: [PATCH 12/14] use test set instead of validation set --- data/caltech101.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/data/caltech101.py b/data/caltech101.py index a8b73203..c66e7bfa 100644 --- a/data/caltech101.py +++ b/data/caltech101.py @@ -66,8 +66,10 @@ def __init__(self, args): ]) # divide the data into train, validation, and test set - (X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) - (x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) + (X, x_test , Y, y_test) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) + (x_train, x_val, y_train, y_val) = train_test_split(X, Y, test_size=0.25, random_state=42) + #(X, x_val , Y, y_val) = train_test_split(data, labels, test_size=0.2, stratify=labels,random_state=42) + #(x_train, x_test, y_train, y_test) = train_test_split(X, Y, test_size=0.25, random_state=42) print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}") @@ -76,7 +78,8 @@ def __init__(self, args): test_data = CustomDataset(x_test, y_test, val_transform) self.train_loader = DataLoader(train_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) - self.val_loader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + self.val_loader = DataLoader(test_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) + #self.val_loader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) self.num_classes = len(lb.classes_) #valLoader = DataLoader(val_data, batch_size=parser_args.batch_size, shuffle=True, num_workers=4) From 803237425ab9ff9a3e2a169198b540d43fde6942 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 23 Apr 2022 01:35:35 +0000 Subject: [PATCH 13/14] merge bash file --- caltech_exec_GD.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh index 1a048941..a558b85f 100755 --- a/caltech_exec_GD.sh +++ b/caltech_exec_GD.sh @@ -2,7 +2,7 @@ # Weight training #python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 -python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 +#python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1 # HC #:< Date: Sat, 23 Apr 2022 01:50:28 +0000 Subject: [PATCH 14/14] merge --- caltech_exec_GD.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caltech_exec_GD.sh b/caltech_exec_GD.sh index e93ca834..9db28e48 100755 --- a/caltech_exec_GD.sh +++ b/caltech_exec_GD.sh @@ -1,7 +1,7 @@ # ResNet50_tf (changed the output size) # Weight training -#python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 +python main.py --config configs/training/resnet50/caltech_resnet50_training_1FC.yml #> log_caltech_wt_50epoch_1FC 2>&1 #python main.py --config configs/training/resnet50/caltech_resnet50_training_2FC.yml #> log_caltech_wt_50epoch_2FC 2>&1