From dc167e57b0d8264ee2dfe58a9e94ec7df7f3d220 Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 11:09:19 -0800 Subject: [PATCH 01/13] bugfix continue_ti inversion --- lora_diffusion/cli_lora_pti.py | 36 +++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 20d30dd..9ba1e76 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -67,6 +67,29 @@ def preview_training_batch(train_dataloader, mode, n_imgs = 40): print(f"\nSaved {imgs_saved} preview training imgs to {outdir}") return +def sim_matrix(a, b, eps=1e-8): + """ + added eps for numerical stability + """ + b_n = b.norm(dim=1)[:, None] + a_norm = a / torch.max(b_n, eps * torch.ones_like(b_n)) + b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n)) + sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1)) + return sim_mt + +def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder): + # get all the token embeddings: + token_embeds = text_encoder.get_input_embeddings().weight.data + + # Compute the cosine-similarity between the optimized tokens and all the other tokens + similarity = sim_matrix(optimized_tokens, token_embeds).squeeze() + similarity = similarity.cpu().numpy() + + # print similarity for the most similar tokens: + most_similar_tokens = np.argsort(similarity)[::-1] + for token_id in most_similar_tokens[:5]: + print(f"{tokenizer.decode(token_id)}: {similarity[token_id]:.4f}") + def get_models( pretrained_model_name_or_path, @@ -517,8 +540,11 @@ def train_inversion( index_no_updates ] = orig_embeds_params[index_no_updates] - for i, t in enumerate(optimizing_embeds): - print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}") + if global_step % 10 == 0: + print("----------------------") + for i, t in enumerate(optimizing_embeds): + print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}") + print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder) global_step += 1 progress_bar.update(1) @@ -654,7 +680,7 @@ def perform_tuning( vae, text_encoder, scheduler, - optimized_embeddings = text_encoder.get_input_embeddings().weight[:, :], + optimized_embeddings = text_encoder.get_input_embeddings().weight[~index_no_updates, :], train_inpainting=train_inpainting, t_mutliplier=0.8, mixed_precision=True, @@ -825,6 +851,10 @@ def train( script_start_time = time.time() torch.manual_seed(seed) + if use_template == "person" and not use_face_segmentation_condition: + print("### WARNING ### : Using person template without face segmentation condition") + print("When training people, it is highly recommended to use face segmentation condition!!") + # Get a dict with all the arguments: args_dict = locals() From aab969c45fa45c42eac8ed22b937c0e301e15db4 Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 11:16:27 -0800 Subject: [PATCH 02/13] print most similar tokens during ti phase --- lora_diffusion/cli_lora_pti.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 9ba1e76..bc402fa 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -71,8 +71,8 @@ def sim_matrix(a, b, eps=1e-8): """ added eps for numerical stability """ - b_n = b.norm(dim=1)[:, None] - a_norm = a / torch.max(b_n, eps * torch.ones_like(b_n)) + a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None] + a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n)) b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n)) sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1)) return sim_mt From f54cf6c1f082f053b40305da8d7b59afec66e6dd Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 11:41:14 -0800 Subject: [PATCH 03/13] print most similar tokens during ti phase --- lora_diffusion/cli_lora_pti.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index bc402fa..77b062c 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -77,7 +77,18 @@ def sim_matrix(a, b, eps=1e-8): sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1)) return sim_mt -def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder): +def compute_pairwise_distances(x, y): + # compute the L2 distance of each row in x to each row in y (both are torch tensors) + n = x.size(0) + m = y.size(0) + assert x.size(1) == y.size(1) + + x = x.unsqueeze(1).expand(n, m, x.size(1)) + y = y.unsqueeze(0).expand(n, m, x.size(1)) + + return torch.pow(x - y, 2).sum(2) + +def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder, n=5): # get all the token embeddings: token_embeds = text_encoder.get_input_embeddings().weight.data @@ -85,10 +96,18 @@ def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder): similarity = sim_matrix(optimized_tokens, token_embeds).squeeze() similarity = similarity.cpu().numpy() + distances = compute_pairwise_distances(optimized_tokens, token_embeds).squeeze() + distances = distances.cpu().numpy() + # print similarity for the most similar tokens: most_similar_tokens = np.argsort(similarity)[::-1] - for token_id in most_similar_tokens[:5]: - print(f"{tokenizer.decode(token_id)}: {similarity[token_id]:.4f}") + # print embedding of most similar token: + embd = token_embeds[most_similar_tokens[0]] + + #print(f"Embedding of token: {embd[:].cpu().detach().numpy()}") + print(f"--- Most similar tokens to {tokenizer.decode(most_similar_tokens[0])}:") + for token_id in most_similar_tokens[1:n+1]: + print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"") def get_models( @@ -540,8 +559,8 @@ def train_inversion( index_no_updates ] = orig_embeds_params[index_no_updates] - if global_step % 10 == 0: - print("----------------------") + if global_step % 20 == 0: + print("------------------------------") for i, t in enumerate(optimizing_embeds): print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}") print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder) From 8f2c7b8c8493dff79f2fcc717df140fded5dfe12 Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 11:55:52 -0800 Subject: [PATCH 04/13] cleanup prints --- lora_diffusion/cli_lora_pti.py | 61 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 77b062c..6432d7f 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -77,37 +77,41 @@ def sim_matrix(a, b, eps=1e-8): sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1)) return sim_mt -def compute_pairwise_distances(x, y): + +def compute_pairwise_distances(x,y): # compute the L2 distance of each row in x to each row in y (both are torch tensors) - n = x.size(0) - m = y.size(0) - assert x.size(1) == y.size(1) + # x is a torch tensor of shape (m, d) + # y is a torch tensor of shape (n, d) + # returns a torch tensor of shape (m, n) + + n = y.shape[0] + m = x.shape[0] + d = x.shape[1] - x = x.unsqueeze(1).expand(n, m, x.size(1)) - y = y.unsqueeze(0).expand(n, m, x.size(1)) + x = x.unsqueeze(1).expand(m, n, d) + y = y.unsqueeze(0).expand(m, n, d) return torch.pow(x - y, 2).sum(2) -def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder, n=5): - # get all the token embeddings: - token_embeds = text_encoder.get_input_embeddings().weight.data - # Compute the cosine-similarity between the optimized tokens and all the other tokens - similarity = sim_matrix(optimized_tokens, token_embeds).squeeze() - similarity = similarity.cpu().numpy() +def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10): + with torch.no_grad(): + # get all the token embeddings: + token_embeds = text_encoder.get_input_embeddings().weight.data + + # Compute the cosine-similarity between the optimized tokens and all the other tokens + similarity = sim_matrix(optimized_token.unsqueeze(0), token_embeds).squeeze() + similarity = similarity.detach().cpu().numpy() + + distances = compute_pairwise_distances(optimized_token.unsqueeze(0), token_embeds).squeeze() + distances = distances.detach().cpu().numpy() - distances = compute_pairwise_distances(optimized_tokens, token_embeds).squeeze() - distances = distances.cpu().numpy() + # print similarity for the most similar tokens: + most_similar_tokens = np.argsort(similarity)[::-1] - # print similarity for the most similar tokens: - most_similar_tokens = np.argsort(similarity)[::-1] - # print embedding of most similar token: - embd = token_embeds[most_similar_tokens[0]] - - #print(f"Embedding of token: {embd[:].cpu().detach().numpy()}") - print(f"--- Most similar tokens to {tokenizer.decode(most_similar_tokens[0])}:") - for token_id in most_similar_tokens[1:n+1]: - print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"") + print(f"{tokenizer.decode(most_similar_tokens[0])} --> mean: {optimized_token.mean().item():.3f}, std: {optimized_token.std().item():.3f}, norm: {optimized_token.norm():.4f}") + for token_id in most_similar_tokens[1:n+1]: + print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"") def get_models( @@ -559,11 +563,10 @@ def train_inversion( index_no_updates ] = orig_embeds_params[index_no_updates] - if global_step % 20 == 0: + if global_step % 50 == 0: print("------------------------------") for i, t in enumerate(optimizing_embeds): - print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}") - print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder) + print_most_similar_tokens(tokenizer, t, text_encoder) global_step += 1 progress_bar.update(1) @@ -728,6 +731,12 @@ def perform_tuning( index_no_updates ] = orig_embeds_params[index_no_updates] + if global_step % 100 == 0: + optimizing_embeds = text_encoder.get_input_embeddings().weight[~index_no_updates] + print("------------------------------") + for i, t in enumerate(optimizing_embeds): + print_most_similar_tokens(tokenizer, t, text_encoder) + global_step += 1 From 4f5516118dad3279832cfbad3ca673bfbcd7e39d Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 12:03:28 -0800 Subject: [PATCH 05/13] cleanup prints --- lora_diffusion/cli_lora_pti.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 6432d7f..c86054c 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -523,12 +523,13 @@ def train_inversion( if global_step % accum_iter == 0: # print gradient of text encoder embedding - print( - text_encoder.get_input_embeddings() - .weight.grad[index_updates, :] - .norm(dim=-1) - .mean() - ) + if 0: + print( + text_encoder.get_input_embeddings() + .weight.grad[index_updates, :] + .norm(dim=-1) + .mean() + ) optimizer.step() optimizer.zero_grad() From edeef64b1dd210ee34b39aee867bcadeda809088 Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 14 Feb 2023 18:52:28 -0800 Subject: [PATCH 06/13] minor changes --- lora_diffusion/cli_lora_pti.py | 3 +-- lora_diffusion/dataset.py | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index c86054c..5f90718 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -632,7 +632,7 @@ def train_inversion( return import matplotlib.pyplot as plt -def plot_loss_curve(losses, name, moving_avg=20): +def plot_loss_curve(losses, name, moving_avg=5): losses = np.array(losses) losses = np.convolve(losses, np.ones(moving_avg)/moving_avg, mode='valid') plt.plot(losses) @@ -1097,7 +1097,6 @@ def train( [el.numel() for el in itertools.chain(*unet_lora_params)] ) print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params) - print(f"PTI : has {len(unet_lora_params)} lora") print("PTI : Before training:") inspect_lora(unet) diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py index e51c301..2f7abac 100644 --- a/lora_diffusion/dataset.py +++ b/lora_diffusion/dataset.py @@ -44,6 +44,7 @@ "{}", "a picture of {}", "a closeup of {}", + "a closeup of {}'s face", "a closeup photo of {}", "a close-up picture of {}", "a photo of {}", @@ -60,6 +61,7 @@ "{} is having fun, 4k photograph", "{} wearing a plaidered shirt standing next to another person", "smiling {} in a hoodie and sweater", + "{} smiling at the camera", "a photo of the cool {}", "a close-up photo of {}", "a bright photo of {}", From 82264dea170bb1aeb22cbce15efbeeb422a7b6da Mon Sep 17 00:00:00 2001 From: xander Date: Wed, 15 Feb 2023 14:42:15 -0800 Subject: [PATCH 07/13] big bugfix + allow for loading pretrained textual inversion embeddings --- lora_diffusion/cli_lora_pti.py | 80 +++++++++++++++++++++++----------- lora_diffusion/dataset.py | 6 +-- lora_diffusion/lora.py | 11 +++-- 3 files changed, 63 insertions(+), 34 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 5f90718..e748c9e 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -46,6 +46,8 @@ prepare_clip_model_sets, evaluate_pipe, UNET_EXTENDED_TARGET_REPLACE, + parse_safeloras_embeds, + apply_learned_embed_in_clip, ) def preview_training_batch(train_dataloader, mode, n_imgs = 40): @@ -586,7 +588,7 @@ def train_inversion( placeholder_token_ids=placeholder_token_ids, placeholder_tokens=placeholder_tokens, save_path=os.path.join( - save_path, f"step_inv_{global_step}.safetensors" + save_path, f"step_inv_{global_step:04d}.safetensors" ), save_lora=False, ) @@ -751,7 +753,7 @@ def perform_tuning( placeholder_token_ids=placeholder_token_ids, placeholder_tokens=placeholder_tokens, save_path=os.path.join( - save_path, f"step_{global_step}.safetensors" + save_path, f"step_{global_step:04d}.safetensors" ), target_replace_module_text=lora_clip_target_modules, target_replace_module_unet=lora_unet_target_modules, @@ -761,8 +763,8 @@ def perform_tuning( .mean() .item() ) - print("LORA Unet Moved", moved) + moved = ( torch.tensor( list(itertools.chain(*inspect_lora(text_encoder).values())) @@ -770,7 +772,6 @@ def perform_tuning( .mean() .item() ) - print("LORA CLIP Moved", moved) if log_wandb: @@ -833,6 +834,7 @@ def train( placeholder_tokens: str = "", placeholder_token_at_data: Optional[str] = None, initializer_tokens: Optional[str] = None, + load_pretrained_inversion_embeddings_path: Optional[str] = None, seed: int = 42, resolution: int = 512, color_jitter: bool = True, @@ -880,6 +882,9 @@ def train( script_start_time = time.time() torch.manual_seed(seed) + lora_rank_unet = lora_rank + lora_rank_text_encoder = lora_rank + if use_template == "person" and not use_face_segmentation_condition: print("### WARNING ### : Using person template without face segmentation condition") print("When training people, it is highly recommended to use face segmentation condition!!") @@ -900,7 +905,7 @@ def train( if output_dir is not None: os.makedirs(output_dir, exist_ok=True) - # print(placeholder_tokens, initializer_tokens) + if len(placeholder_tokens) == 0: placeholder_tokens = [] print("PTI : Placeholder Tokens not given, using null token") @@ -933,6 +938,7 @@ def train( print("PTI : Placeholder Tokens", placeholder_tokens) print("PTI : Initializer Tokens", initializer_tokens) + print("PTI : Token Map: ", token_map) # get the models text_encoder, vae, unet, tokenizer, placeholder_token_ids = get_models( @@ -984,8 +990,6 @@ def train( train_inpainting=train_inpainting, ) - train_dataset.blur_amount = 200 - if train_inpainting: assert not cached_latents, "Cached latents not supported for inpainting" @@ -1022,7 +1026,7 @@ def train( vae = None # STEP 1 : Perform Inversion - if perform_inversion and not cached_latents: + if perform_inversion and not cached_latents and (load_pretrained_inversion_embeddings_path is None): preview_training_batch(train_dataloader, "inversion") print("PTI : Performing Inversion") @@ -1073,16 +1077,32 @@ def train( del ti_optimizer print("############### Inversion Done ###############") + elif load_pretrained_inversion_embeddings_path is not None: + + print("PTI : Loading pretrained inversion embeddings..") + from safetensors.torch import safe_open + # Load the pretrained embeddings from the lora file: + safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu") + #monkeypatch_or_replace_safeloras(pipe, safeloras) + tok_dict = parse_safeloras_embeds(safeloras) + apply_learned_embed_in_clip( + tok_dict, + text_encoder, + tokenizer, + idempotent=True, + ) + # Next perform Tuning with LoRA: if not use_extended_lora: unet_lora_params, _ = inject_trainable_lora( unet, - r=lora_rank, + r=lora_rank_unet, target_replace_module=lora_unet_target_modules, dropout_p=lora_dropout_p, scale=lora_scale, ) print("PTI : not use_extended_lora...") + print("PTI : Will replace modules: ", lora_unet_target_modules) else: print("PTI : USING EXTENDED UNET!!!") lora_unet_target_modules = ( @@ -1090,16 +1110,11 @@ def train( ) print("PTI : Will replace modules: ", lora_unet_target_modules) unet_lora_params, _ = inject_trainable_lora_extended( - unet, r=lora_rank, target_replace_module=lora_unet_target_modules + unet, r=lora_rank_unet, target_replace_module=lora_unet_target_modules ) - n_optimizable_unet_params = sum( - [el.numel() for el in itertools.chain(*unet_lora_params)] - ) - print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params) - print(f"PTI : has {len(unet_lora_params)} lora") - print("PTI : Before training:") - inspect_lora(unet) + #n_optimizable_unet_params = sum([el.numel() for el in itertools.chain(*unet_lora_params)]) + #print("PTI : Number of optimizable UNET parameters: ", n_optimizable_unet_params) params_to_optimize = [ {"params": itertools.chain(*unet_lora_params), "lr": unet_lr}, @@ -1131,15 +1146,15 @@ def train( text_encoder_lora_params, _ = inject_trainable_lora( text_encoder, target_replace_module=lora_clip_target_modules, - r=lora_rank, + r=lora_rank_text_encoder, ) params_to_optimize += [ - { - "params": itertools.chain(*text_encoder_lora_params), - "lr": text_encoder_lr, - } + {"params": itertools.chain(*text_encoder_lora_params), + "lr": text_encoder_lr} ] - inspect_lora(text_encoder) + + #n_optimizable_text_Encoder_params = sum( [el.numel() for el in itertools.chain(*text_encoder_lora_params)]) + #print("PTI : Number of optimizable text-encoder parameters: ", n_optimizable_text_Encoder_params) lora_optimizers = optim.AdamW(params_to_optimize, weight_decay=weight_decay_lora) @@ -1148,8 +1163,6 @@ def train( print("Training text encoder!") text_encoder.train() - train_dataset.blur_amount = 70 - lr_scheduler_lora = get_scheduler( lr_scheduler_lora, optimizer=lora_optimizers, @@ -1159,6 +1172,22 @@ def train( if not cached_latents: preview_training_batch(train_dataloader, "tuning") + #print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params) + print(f"PTI : has {len(unet_lora_params)} lora") + print("PTI : Before training:") + + moved = ( + torch.tensor(list(itertools.chain(*inspect_lora(unet).values()))) + .mean().item()) + print(f"LORA Unet Moved {moved:.6f}") + + + moved = ( + torch.tensor( + list(itertools.chain(*inspect_lora(text_encoder).values())) + ).mean().item()) + print(f"LORA CLIP Moved {moved:.6f}") + perform_tuning( unet, vae, @@ -1190,6 +1219,7 @@ def train( training_time = time.time() - script_start_time print(f"Training time: {training_time/60:.1f} minutes") args_dict["training_time_s"] = int(training_time) + args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader)) # Save the args_dict to the output directory as a json file: with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f: diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py index 2f7abac..f566c83 100644 --- a/lora_diffusion/dataset.py +++ b/lora_diffusion/dataset.py @@ -207,8 +207,7 @@ def __init__( resize=True, use_mask_captioned_data=False, use_face_segmentation_condition=False, - train_inpainting=False, - blur_amount: int = 70, + train_inpainting=False ): self.size = size self.tokenizer = tokenizer @@ -341,8 +340,6 @@ def __init__( ] ) - self.blur_amount = blur_amount - print("Captions:") print(self.captions) @@ -350,7 +347,6 @@ def tune_h_flip_prob(self, training_progress): if self.h_flip: # Tune the h_flip probability to be 0.5 training_progress is 0 and end_prob when training_progress is 1 self.h_flip_prob = 0.5 + (self.final_flip_prob - 0.5) * training_progress - print(f"h_flip_prob: {self.h_flip_prob:.3f}") def __len__(self): return self._length diff --git a/lora_diffusion/lora.py b/lora_diffusion/lora.py index bc3c5d1..52bc829 100644 --- a/lora_diffusion/lora.py +++ b/lora_diffusion/lora.py @@ -4,6 +4,7 @@ from typing import Callable, Dict, List, Optional, Set, Tuple, Type, Union import numpy as np +import random import PIL import torch import torch.nn as nn @@ -801,7 +802,7 @@ def monkeypatch_or_replace_safeloras(models, safeloras): for name, (lora, ranks, target) in loras.items(): model = getattr(models, name, None) - + if not model: print(f"No model provided for {name}, contained in Lora") continue @@ -1028,17 +1029,19 @@ def inspect_lora(model): for name, _module in model.named_modules(): if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]: + # get the up and down weight matrices: ups = _module.lora_up.weight.data.clone() downs = _module.lora_down.weight.data.clone() - + + # flatten and compute dot product: wght: torch.Tensor = ups.flatten(1) @ downs.flatten(1) - + # get the mean of the absolute value of the dot product: dist = wght.flatten().abs().mean().item() + if name in moved: moved[name].append(dist) else: moved[name] = [dist] - return moved From f7471d40c0a3ea407f318fe6c66f51c4a54b4e90 Mon Sep 17 00:00:00 2001 From: xander Date: Thu, 16 Feb 2023 21:07:58 -0800 Subject: [PATCH 08/13] add grid_search script --- grid_train_lora.py | 130 +++++++++++++++++++++++++++++++++ lora_diffusion/cli_lora_pti.py | 63 +++++++++++++--- run_segment.py | 85 +++++++++++++++++++++ 3 files changed, 266 insertions(+), 12 deletions(-) create mode 100644 grid_train_lora.py create mode 100644 run_segment.py diff --git a/grid_train_lora.py b/grid_train_lora.py new file mode 100644 index 0000000..1a0a6f5 --- /dev/null +++ b/grid_train_lora.py @@ -0,0 +1,130 @@ +import itertools +import os +import random +import time + +def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None): + if seed is not None: + random.seed(seed) + else: + random.seed(int(time.time())) + + # Split the parameter grid into fixed and variable arguments + fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))} + variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args} + + # Generate all combinations of variable arguments + variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()])) + variable_keys = list(variable_args.keys()) + + # Generate a long list of grid_values by randomly sampling each argument list + long_grid_values = [] + for i in range(10000): + values = {} + for k in variable_keys: + if isinstance(param_grid[k], list): + values[k] = random.choice(param_grid[k]) + else: + values[k] = param_grid[k] + long_grid_values.append(values) + + # Randomly sample a subset of the long list of grid_values + grid_values = random.sample(long_grid_values, n) + + # Combine fixed and variable arguments into a single dictionary + grid_values = [{**fixed_args, **values} for values in grid_values] + + # Define the command to execute your Python job with input arguments + cmd = 'python lora_diffusion/cli_lora_pti.py' + + # shuffle the grid values ordering: + random.shuffle(grid_values) + already_done = [] + + # Loop over the grid values and execute the Python job with each combination of input arguments + for i, values in enumerate(grid_values[:n]): + if values in already_done: #This combo has already been tried, skip.. + continue + + already_done.append(values.copy()) + + # get the datadirectory name: + data_dir = "_".join(values['instance_data_dir'].split('/')[-2:]) + + # generate a short, pseudorandom character id for this run: + id_str = ''.join(random.choice('0123456789abcdef') for i in range(6)) + + values['output_dir'] = f"./exps/{dirname}/{data_dir}_{i:02d}_{id_str}" + + arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()]) + full_cmd = f'{cmd} {arg_str}' + print('------------------------------------------') + print(f'Running command: {i+1}/{n}') + + # pretty print the values dictionary: + for k, v in values.items(): + print(f'{k}:{" "*(50-len(k))}{v}') + + if not test: + os.system(full_cmd) + + +""" + +export CUDA_VISIBLE_DEVICES=3 +conda activate diffusers +cd /home/xander/Projects/cog/lora +python grid_train_lora.py + + +'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train", +'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train_one", +'instance_data_dir': "/home/xander/Pictures/Mars2023/people/niko/train", +'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train", + + +""" + +n_to_run = 50 + +param_grid = { + 'pretrained_model_name_or_path': ['dreamlike-art/dreamlike-photoreal-2.0'], + 'instance_data_dir': "/home/xander/Pictures/Mars2023/people/ready/xander/train", + + 'train_text_encoder': True, + 'perform_inversion': True, + 'learning_rate_ti': [1e-4, 2.5e-4], + 'continue_inversion': True, + 'continue_inversion_lr': [0.5e-5, 2e-5, 1e-4], + 'learning_rate_unet': [1.0e-5, 2.5e-5], + 'learning_rate_text': [1.0e-5, 2.5e-5], + 'save_steps': 50, + 'max_train_steps_ti': [200, 300, 400], + 'max_train_steps_tuning': [300, 450, 600], + 'weight_decay_ti': [0.001, 0.005], + 'weight_decay_lora': [0.0001, 0.001], + 'lora_rank_unet': [1,2,4], + 'lora_rank_text_encoder': [1,4,8,16], + 'use_extended_lora': [False, True], + + 'use_face_segmentation_condition': True, + 'use_mask_captioned_data': False, + 'placeholder_tokens': "\"|\"", + 'proxy_token': "person", + 'use_template': "person", + 'clip_ti_decay': True, + + 'cached_latents': False, + 'train_batch_size': 4, + 'gradient_accumulation_steps': 1, + 'color_jitter': True, + 'scale_lr': True, + 'lr_scheduler': "linear", + 'lr_warmup_steps': 0, + + 'resolution': 512, + 'enable_xformers_memory_efficient_attention': True, + +} + +run_lora_experiment(param_grid, n=n_to_run) \ No newline at end of file diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index e748c9e..0f8cf75 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -96,7 +96,7 @@ def compute_pairwise_distances(x,y): return torch.pow(x - y, 2).sum(2) -def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10): +def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=5): with torch.no_grad(): # get all the token embeddings: token_embeds = text_encoder.get_input_embeddings().weight.data @@ -334,6 +334,24 @@ def collate_fn(examples): return train_dataloader +def get_lora_norm(model): + norm, n_elements = 0, 0 + + for name, _module in model.named_modules(): + if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]: + # get the up and down weight matrices: + ups = _module.lora_up.weight.data + downs = _module.lora_down.weight.data + + # flatten and compute dot product: + wght = (ups.flatten(1) @ downs.flatten(1)).flatten() + + # add to the total norm: + norm += wght.abs().sum() + n_elements += wght.shape[0] + + return norm / n_elements + def loss_step( batch, @@ -452,6 +470,15 @@ def loss_step( embedding_norm_loss = (embedding_norm - target_norm)**2 loss += 0.005*embedding_norm_loss + if 0: #disable norm regularization for now + unet_norm = get_lora_norm(unet) + text_encoder_norm = get_lora_norm(text_encoder) + print(f"text_encoder norm: {text_encoder_norm.item():.6f}, unet_norm: {unet_norm.item():.6f}", ) + norm_loss = 0.5 * unet_norm + 0.5 * text_encoder_norm + norm_loss_f = 0.0 + print(f"loss: {loss.item():.6f}, norm_loss: {norm_loss.item():.6f}, norm_loss_f = {norm_loss_f:.1f}") + loss += norm_loss_f * norm_loss + return loss @@ -763,7 +790,7 @@ def perform_tuning( .mean() .item() ) - print("LORA Unet Moved", moved) + print(f"LORA Unet Moved {moved:.6f}") moved = ( torch.tensor( @@ -772,7 +799,7 @@ def perform_tuning( .mean() .item() ) - print("LORA CLIP Moved", moved) + print(f"LORA CLIP Moved {moved:.6f}") if log_wandb: with torch.no_grad(): @@ -845,7 +872,8 @@ def train( save_steps: int = 100, gradient_accumulation_steps: int = 4, gradient_checkpointing: bool = False, - lora_rank: int = 4, + lora_rank_unet: int = 4, + lora_rank_text_encoder: int = 4, lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"}, lora_clip_target_modules={"CLIPAttention"}, lora_dropout_p: float = 0.0, @@ -882,9 +910,6 @@ def train( script_start_time = time.time() torch.manual_seed(seed) - lora_rank_unet = lora_rank - lora_rank_text_encoder = lora_rank - if use_template == "person" and not use_face_segmentation_condition: print("### WARNING ### : Using person template without face segmentation condition") print("When training people, it is highly recommended to use face segmentation condition!!") @@ -1079,7 +1104,7 @@ def train( elif load_pretrained_inversion_embeddings_path is not None: - print("PTI : Loading pretrained inversion embeddings..") + print(f"PTI : Loading pretrained inversion embeddings from {load_pretrained_inversion_embeddings_path}...") from safetensors.torch import safe_open # Load the pretrained embeddings from the lora file: safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu") @@ -1176,17 +1201,17 @@ def train( print(f"PTI : has {len(unet_lora_params)} lora") print("PTI : Before training:") - moved = ( + unet_moved = ( torch.tensor(list(itertools.chain(*inspect_lora(unet).values()))) .mean().item()) - print(f"LORA Unet Moved {moved:.6f}") + print(f"LORA Unet Moved {unet_moved:.6f}") - moved = ( + clip_moved = ( torch.tensor( list(itertools.chain(*inspect_lora(text_encoder).values())) ).mean().item()) - print(f"LORA CLIP Moved {moved:.6f}") + print(f"LORA CLIP Moved {clip_moved:.6f}") perform_tuning( unet, @@ -1215,11 +1240,25 @@ def train( train_inpainting=train_inpainting, ) + unet_moved = ( + torch.tensor(list(itertools.chain(*inspect_lora(unet).values()))) + .mean().item()) + print(f"LORA Unet Moved {unet_moved:.6f}") + + clip_moved = ( + torch.tensor( + list(itertools.chain(*inspect_lora(text_encoder).values())) + ).mean().item()) + print(f"LORA CLIP Moved {clip_moved:.6f}") + + print("############### Tuning Done ###############") training_time = time.time() - script_start_time print(f"Training time: {training_time/60:.1f} minutes") args_dict["training_time_s"] = int(training_time) args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader)) + args_dict["unet_moved"] = unet_moved + args_dict["clip_moved"] = clip_moved # Save the args_dict to the output directory as a json file: with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f: diff --git a/run_segment.py b/run_segment.py new file mode 100644 index 0000000..4323cdc --- /dev/null +++ b/run_segment.py @@ -0,0 +1,85 @@ +import itertools +import os +import random +import time + +def run_lora_experiment(param_grid, cmd, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None): + if seed is not None: + random.seed(seed) + else: + random.seed(int(time.time())) + + # Split the parameter grid into fixed and variable arguments + fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))} + variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args} + + # Generate all combinations of variable arguments + variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()])) + variable_keys = list(variable_args.keys()) + + # Generate a long list of grid_values by randomly sampling each argument list + long_grid_values = [] + for i in range(10000): + values = {} + for k in variable_keys: + if isinstance(param_grid[k], list): + values[k] = random.choice(param_grid[k]) + else: + values[k] = param_grid[k] + long_grid_values.append(values) + + # Randomly sample a subset of the long list of grid_values + grid_values = random.sample(long_grid_values, n) + + # Combine fixed and variable arguments into a single dictionary + grid_values = [{**fixed_args, **values} for values in grid_values] + + # shuffle the grid values ordering: + random.shuffle(grid_values) + already_done = [] + + # Loop over the grid values and execute the Python job with each combination of input arguments + for i, values in enumerate(grid_values[:n]): + if values in already_done: #This combo has already been tried, skip.. + continue + + already_done.append(values.copy()) + + arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()]) + full_cmd = f'{cmd} {arg_str}' + print('------------------------------------------') + print(f'Running command: {i+1}/{n}') + + # pretty print the values dictionary: + for k, v in values.items(): + print(f'{k}:{" "*(50-len(k))}{v}') + + if not test: + os.system(full_cmd) + + +""" + +export CUDA_VISIBLE_DEVICES=2 +conda activate diffusers +cd /home/xander/Projects/cog/lora +python run_segment.py + +""" + +python_cmd = "python lora_diffusion/preprocess_files.py" +input_dir = "/home/xander/Pictures/Mars2023/people/run_segment" + +for subdir in sorted(os.listdir(input_dir)): + + full_input_dir = os.path.join(input_dir, subdir) + "/imgs" + output_dir = os.path.join(input_dir, subdir) + "/train" + + param_grid = { + 'files': full_input_dir, + 'output_dir': output_dir, + 'target_prompts': "face", + 'target_size': 512, + } + + run_lora_experiment(param_grid, python_cmd) \ No newline at end of file From c8b00f46089a1a6ca59988d7476de0678b036235 Mon Sep 17 00:00:00 2001 From: xander Date: Thu, 16 Feb 2023 21:10:54 -0800 Subject: [PATCH 09/13] clean --- grid_train_lora.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/grid_train_lora.py b/grid_train_lora.py index 1a0a6f5..fbb3ed1 100644 --- a/grid_train_lora.py +++ b/grid_train_lora.py @@ -3,7 +3,7 @@ import random import time -def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None): +def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "grid_search_results", seed = None): if seed is not None: random.seed(seed) else: @@ -69,22 +69,6 @@ def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_searc os.system(full_cmd) -""" - -export CUDA_VISIBLE_DEVICES=3 -conda activate diffusers -cd /home/xander/Projects/cog/lora -python grid_train_lora.py - - -'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train", -'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train_one", -'instance_data_dir': "/home/xander/Pictures/Mars2023/people/niko/train", -'instance_data_dir': "/home/xander/Pictures/Mars2023/people/gene/train", - - -""" - n_to_run = 50 param_grid = { From a5c3d2ecb92df819c659585974a417c38977f5d2 Mon Sep 17 00:00:00 2001 From: xander Date: Thu, 16 Feb 2023 21:14:40 -0800 Subject: [PATCH 10/13] Remove grid_train_lora.py --- grid_train_lora.py | 114 --------------------------------- lora_diffusion/cli_lora_pti.py | 63 ++++-------------- run_segment.py | 85 ------------------------ 3 files changed, 12 insertions(+), 250 deletions(-) delete mode 100644 grid_train_lora.py delete mode 100644 run_segment.py diff --git a/grid_train_lora.py b/grid_train_lora.py deleted file mode 100644 index fbb3ed1..0000000 --- a/grid_train_lora.py +++ /dev/null @@ -1,114 +0,0 @@ -import itertools -import os -import random -import time - -def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "grid_search_results", seed = None): - if seed is not None: - random.seed(seed) - else: - random.seed(int(time.time())) - - # Split the parameter grid into fixed and variable arguments - fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))} - variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args} - - # Generate all combinations of variable arguments - variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()])) - variable_keys = list(variable_args.keys()) - - # Generate a long list of grid_values by randomly sampling each argument list - long_grid_values = [] - for i in range(10000): - values = {} - for k in variable_keys: - if isinstance(param_grid[k], list): - values[k] = random.choice(param_grid[k]) - else: - values[k] = param_grid[k] - long_grid_values.append(values) - - # Randomly sample a subset of the long list of grid_values - grid_values = random.sample(long_grid_values, n) - - # Combine fixed and variable arguments into a single dictionary - grid_values = [{**fixed_args, **values} for values in grid_values] - - # Define the command to execute your Python job with input arguments - cmd = 'python lora_diffusion/cli_lora_pti.py' - - # shuffle the grid values ordering: - random.shuffle(grid_values) - already_done = [] - - # Loop over the grid values and execute the Python job with each combination of input arguments - for i, values in enumerate(grid_values[:n]): - if values in already_done: #This combo has already been tried, skip.. - continue - - already_done.append(values.copy()) - - # get the datadirectory name: - data_dir = "_".join(values['instance_data_dir'].split('/')[-2:]) - - # generate a short, pseudorandom character id for this run: - id_str = ''.join(random.choice('0123456789abcdef') for i in range(6)) - - values['output_dir'] = f"./exps/{dirname}/{data_dir}_{i:02d}_{id_str}" - - arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()]) - full_cmd = f'{cmd} {arg_str}' - print('------------------------------------------') - print(f'Running command: {i+1}/{n}') - - # pretty print the values dictionary: - for k, v in values.items(): - print(f'{k}:{" "*(50-len(k))}{v}') - - if not test: - os.system(full_cmd) - - -n_to_run = 50 - -param_grid = { - 'pretrained_model_name_or_path': ['dreamlike-art/dreamlike-photoreal-2.0'], - 'instance_data_dir': "/home/xander/Pictures/Mars2023/people/ready/xander/train", - - 'train_text_encoder': True, - 'perform_inversion': True, - 'learning_rate_ti': [1e-4, 2.5e-4], - 'continue_inversion': True, - 'continue_inversion_lr': [0.5e-5, 2e-5, 1e-4], - 'learning_rate_unet': [1.0e-5, 2.5e-5], - 'learning_rate_text': [1.0e-5, 2.5e-5], - 'save_steps': 50, - 'max_train_steps_ti': [200, 300, 400], - 'max_train_steps_tuning': [300, 450, 600], - 'weight_decay_ti': [0.001, 0.005], - 'weight_decay_lora': [0.0001, 0.001], - 'lora_rank_unet': [1,2,4], - 'lora_rank_text_encoder': [1,4,8,16], - 'use_extended_lora': [False, True], - - 'use_face_segmentation_condition': True, - 'use_mask_captioned_data': False, - 'placeholder_tokens': "\"|\"", - 'proxy_token': "person", - 'use_template': "person", - 'clip_ti_decay': True, - - 'cached_latents': False, - 'train_batch_size': 4, - 'gradient_accumulation_steps': 1, - 'color_jitter': True, - 'scale_lr': True, - 'lr_scheduler': "linear", - 'lr_warmup_steps': 0, - - 'resolution': 512, - 'enable_xformers_memory_efficient_attention': True, - -} - -run_lora_experiment(param_grid, n=n_to_run) \ No newline at end of file diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 0f8cf75..e748c9e 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -96,7 +96,7 @@ def compute_pairwise_distances(x,y): return torch.pow(x - y, 2).sum(2) -def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=5): +def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10): with torch.no_grad(): # get all the token embeddings: token_embeds = text_encoder.get_input_embeddings().weight.data @@ -334,24 +334,6 @@ def collate_fn(examples): return train_dataloader -def get_lora_norm(model): - norm, n_elements = 0, 0 - - for name, _module in model.named_modules(): - if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]: - # get the up and down weight matrices: - ups = _module.lora_up.weight.data - downs = _module.lora_down.weight.data - - # flatten and compute dot product: - wght = (ups.flatten(1) @ downs.flatten(1)).flatten() - - # add to the total norm: - norm += wght.abs().sum() - n_elements += wght.shape[0] - - return norm / n_elements - def loss_step( batch, @@ -470,15 +452,6 @@ def loss_step( embedding_norm_loss = (embedding_norm - target_norm)**2 loss += 0.005*embedding_norm_loss - if 0: #disable norm regularization for now - unet_norm = get_lora_norm(unet) - text_encoder_norm = get_lora_norm(text_encoder) - print(f"text_encoder norm: {text_encoder_norm.item():.6f}, unet_norm: {unet_norm.item():.6f}", ) - norm_loss = 0.5 * unet_norm + 0.5 * text_encoder_norm - norm_loss_f = 0.0 - print(f"loss: {loss.item():.6f}, norm_loss: {norm_loss.item():.6f}, norm_loss_f = {norm_loss_f:.1f}") - loss += norm_loss_f * norm_loss - return loss @@ -790,7 +763,7 @@ def perform_tuning( .mean() .item() ) - print(f"LORA Unet Moved {moved:.6f}") + print("LORA Unet Moved", moved) moved = ( torch.tensor( @@ -799,7 +772,7 @@ def perform_tuning( .mean() .item() ) - print(f"LORA CLIP Moved {moved:.6f}") + print("LORA CLIP Moved", moved) if log_wandb: with torch.no_grad(): @@ -872,8 +845,7 @@ def train( save_steps: int = 100, gradient_accumulation_steps: int = 4, gradient_checkpointing: bool = False, - lora_rank_unet: int = 4, - lora_rank_text_encoder: int = 4, + lora_rank: int = 4, lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"}, lora_clip_target_modules={"CLIPAttention"}, lora_dropout_p: float = 0.0, @@ -910,6 +882,9 @@ def train( script_start_time = time.time() torch.manual_seed(seed) + lora_rank_unet = lora_rank + lora_rank_text_encoder = lora_rank + if use_template == "person" and not use_face_segmentation_condition: print("### WARNING ### : Using person template without face segmentation condition") print("When training people, it is highly recommended to use face segmentation condition!!") @@ -1104,7 +1079,7 @@ def train( elif load_pretrained_inversion_embeddings_path is not None: - print(f"PTI : Loading pretrained inversion embeddings from {load_pretrained_inversion_embeddings_path}...") + print("PTI : Loading pretrained inversion embeddings..") from safetensors.torch import safe_open # Load the pretrained embeddings from the lora file: safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu") @@ -1201,17 +1176,17 @@ def train( print(f"PTI : has {len(unet_lora_params)} lora") print("PTI : Before training:") - unet_moved = ( + moved = ( torch.tensor(list(itertools.chain(*inspect_lora(unet).values()))) .mean().item()) - print(f"LORA Unet Moved {unet_moved:.6f}") + print(f"LORA Unet Moved {moved:.6f}") - clip_moved = ( + moved = ( torch.tensor( list(itertools.chain(*inspect_lora(text_encoder).values())) ).mean().item()) - print(f"LORA CLIP Moved {clip_moved:.6f}") + print(f"LORA CLIP Moved {moved:.6f}") perform_tuning( unet, @@ -1240,25 +1215,11 @@ def train( train_inpainting=train_inpainting, ) - unet_moved = ( - torch.tensor(list(itertools.chain(*inspect_lora(unet).values()))) - .mean().item()) - print(f"LORA Unet Moved {unet_moved:.6f}") - - clip_moved = ( - torch.tensor( - list(itertools.chain(*inspect_lora(text_encoder).values())) - ).mean().item()) - print(f"LORA CLIP Moved {clip_moved:.6f}") - - print("############### Tuning Done ###############") training_time = time.time() - script_start_time print(f"Training time: {training_time/60:.1f} minutes") args_dict["training_time_s"] = int(training_time) args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader)) - args_dict["unet_moved"] = unet_moved - args_dict["clip_moved"] = clip_moved # Save the args_dict to the output directory as a json file: with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f: diff --git a/run_segment.py b/run_segment.py deleted file mode 100644 index 4323cdc..0000000 --- a/run_segment.py +++ /dev/null @@ -1,85 +0,0 @@ -import itertools -import os -import random -import time - -def run_lora_experiment(param_grid, cmd, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None): - if seed is not None: - random.seed(seed) - else: - random.seed(int(time.time())) - - # Split the parameter grid into fixed and variable arguments - fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))} - variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args} - - # Generate all combinations of variable arguments - variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()])) - variable_keys = list(variable_args.keys()) - - # Generate a long list of grid_values by randomly sampling each argument list - long_grid_values = [] - for i in range(10000): - values = {} - for k in variable_keys: - if isinstance(param_grid[k], list): - values[k] = random.choice(param_grid[k]) - else: - values[k] = param_grid[k] - long_grid_values.append(values) - - # Randomly sample a subset of the long list of grid_values - grid_values = random.sample(long_grid_values, n) - - # Combine fixed and variable arguments into a single dictionary - grid_values = [{**fixed_args, **values} for values in grid_values] - - # shuffle the grid values ordering: - random.shuffle(grid_values) - already_done = [] - - # Loop over the grid values and execute the Python job with each combination of input arguments - for i, values in enumerate(grid_values[:n]): - if values in already_done: #This combo has already been tried, skip.. - continue - - already_done.append(values.copy()) - - arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()]) - full_cmd = f'{cmd} {arg_str}' - print('------------------------------------------') - print(f'Running command: {i+1}/{n}') - - # pretty print the values dictionary: - for k, v in values.items(): - print(f'{k}:{" "*(50-len(k))}{v}') - - if not test: - os.system(full_cmd) - - -""" - -export CUDA_VISIBLE_DEVICES=2 -conda activate diffusers -cd /home/xander/Projects/cog/lora -python run_segment.py - -""" - -python_cmd = "python lora_diffusion/preprocess_files.py" -input_dir = "/home/xander/Pictures/Mars2023/people/run_segment" - -for subdir in sorted(os.listdir(input_dir)): - - full_input_dir = os.path.join(input_dir, subdir) + "/imgs" - output_dir = os.path.join(input_dir, subdir) + "/train" - - param_grid = { - 'files': full_input_dir, - 'output_dir': output_dir, - 'target_prompts': "face", - 'target_size': 512, - } - - run_lora_experiment(param_grid, python_cmd) \ No newline at end of file From 0f642d3814405728225d3b2239a1339dfe8e3d6a Mon Sep 17 00:00:00 2001 From: xander Date: Tue, 21 Feb 2023 13:21:54 -0800 Subject: [PATCH 11/13] separate lora ranks for unet and text_encoder --- lora_diffusion/cli_lora_pti.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index e748c9e..9dbb7fd 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -845,7 +845,8 @@ def train( save_steps: int = 100, gradient_accumulation_steps: int = 4, gradient_checkpointing: bool = False, - lora_rank: int = 4, + lora_rank_unet: int = 4, + lora_rank_text_encoder: int = 4, lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"}, lora_clip_target_modules={"CLIPAttention"}, lora_dropout_p: float = 0.0, @@ -882,9 +883,6 @@ def train( script_start_time = time.time() torch.manual_seed(seed) - lora_rank_unet = lora_rank - lora_rank_text_encoder = lora_rank - if use_template == "person" and not use_face_segmentation_condition: print("### WARNING ### : Using person template without face segmentation condition") print("When training people, it is highly recommended to use face segmentation condition!!") @@ -1219,7 +1217,8 @@ def train( training_time = time.time() - script_start_time print(f"Training time: {training_time/60:.1f} minutes") args_dict["training_time_s"] = int(training_time) - args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader)) + args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader.dataset)) + args_dict["n_training_imgs"] = len(train_dataloader.dataset) # Save the args_dict to the output directory as a json file: with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f: From d4d67403934d5ab02e7b81c08871ed9aea535fad Mon Sep 17 00:00:00 2001 From: xander Date: Sat, 25 Feb 2023 23:05:15 -0800 Subject: [PATCH 12/13] updates --- lora_diffusion/cli_lora_pti.py | 9 ++++-- lora_diffusion/dataset.py | 5 ++++ lora_diffusion/lora.py | 50 +++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 9dbb7fd..eb02efc 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -107,7 +107,7 @@ def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10): distances = compute_pairwise_distances(optimized_token.unsqueeze(0), token_embeds).squeeze() distances = distances.detach().cpu().numpy() - + # print similarity for the most similar tokens: most_similar_tokens = np.argsort(similarity)[::-1] @@ -187,11 +187,13 @@ def get_models( pretrained_vae_name_or_path or pretrained_model_name_or_path, subfolder=None if pretrained_vae_name_or_path else "vae", revision=None if pretrained_vae_name_or_path else revision, + local_files_only = True, ) unet = UNet2DConditionModel.from_pretrained( pretrained_model_name_or_path, subfolder="unet", revision=revision, + local_files_only = True, ) return ( @@ -199,7 +201,7 @@ def get_models( vae.to(device), unet.to(device), tokenizer, - placeholder_token_ids, + placeholder_token_ids ) @@ -949,7 +951,8 @@ def train( ) noise_scheduler = DDPMScheduler.from_config( - pretrained_model_name_or_path, subfolder="scheduler" + pretrained_model_name_or_path, subfolder="scheduler", + local_files_only = True, ) if gradient_checkpointing: diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py index f566c83..2306b72 100644 --- a/lora_diffusion/dataset.py +++ b/lora_diffusion/dataset.py @@ -313,6 +313,11 @@ def __init__( for idx in range(len(self.instance_images_path)): self.mask_path.append(f"{instance_data_root}/{idx}.mask.png") + # Final important variables for this dataset: + # self.instance_images_path + # self.mask_path + # self.captions + self.num_instance_images = len(self.instance_images_path) self.token_map = token_map diff --git a/lora_diffusion/lora.py b/lora_diffusion/lora.py index 52bc829..5919178 100644 --- a/lora_diffusion/lora.py +++ b/lora_diffusion/lora.py @@ -535,7 +535,6 @@ def convert_loras_to_safeloras( ): convert_loras_to_safeloras_with_embeds(modelmap=modelmap, outpath=outpath) - def parse_safeloras( safeloras, ) -> Dict[str, Tuple[List[nn.parameter.Parameter], List[int], List[str]]]: @@ -597,6 +596,55 @@ def parse_safeloras( return loras +def dict_to_lora(tensor_dict, metadata): + """ + Converts a dictionary of tensors + metadata into a Lora + """ + loras = {} + + get_name = lambda k: k.split(":")[0] + + keys = list(tensor_dict.keys()) + keys.sort(key=get_name) + + for name, module_keys in groupby(keys, get_name): + info = metadata.get(name) + + if not info: + raise ValueError( + f"Tensor {name} has no metadata - is this a Lora safetensor?" + ) + + # Skip Textual Inversion embeds + if info == EMBED_FLAG: + continue + + # Handle Loras + # Extract the targets + target = json.loads(info) + + # Build the result lists - Python needs us to preallocate lists to insert into them + module_keys = list(module_keys) + ranks = [4] * (len(module_keys) // 2) + weights = [None] * len(module_keys) + + for key in module_keys: + # Split the model name and index out of the key + _, idx, direction = key.split(":") + idx = int(idx) + + # Add the rank + ranks[idx] = int(metadata[f"{name}:{idx}:rank"]) + + # Insert the weight into the list + idx = idx * 2 + (1 if direction == "down" else 0) + weights[idx] = nn.parameter.Parameter(tensor_dict[key]) + + loras[name] = (weights, ranks, target) + + return loras + + def parse_safeloras_embeds( safeloras, ) -> Dict[str, torch.Tensor]: From 2a473a07f8389aa82f0f020ed71a01cef654ae94 Mon Sep 17 00:00:00 2001 From: xander Date: Sat, 25 Feb 2023 23:17:07 -0800 Subject: [PATCH 13/13] allow caching loaded dataset imgs --- lora_diffusion/dataset.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py index 2306b72..f191c12 100644 --- a/lora_diffusion/dataset.py +++ b/lora_diffusion/dataset.py @@ -345,6 +345,13 @@ def __init__( ] ) + self.instance_images = [] + + if len(self.instance_images_path) < 20: + # Load all the images into memory: + for f in self.instance_images_path: + self.instance_images.append(Image.open(f).convert("RGB")) + print("Captions:") print(self.captions) @@ -358,11 +365,14 @@ def __len__(self): def __getitem__(self, index): example = {} - instance_image = Image.open( - self.instance_images_path[index % self.num_instance_images] - ) - if not instance_image.mode == "RGB": - instance_image = instance_image.convert("RGB") + + if len(self.instance_images) > 0: + instance_image = self.instance_images[index % self.num_instance_images] + else: + instance_image = Image.open( + self.instance_images_path[index % self.num_instance_images] + ).convert("RGB") + example["instance_images"] = self.image_transforms(instance_image) if self.train_inpainting: