From dc167e57b0d8264ee2dfe58a9e94ec7df7f3d220 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 11:09:19 -0800
Subject: [PATCH 01/13] bugfix continue_ti inversion

---
 lora_diffusion/cli_lora_pti.py | 36 +++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 20d30dd..9ba1e76 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -67,6 +67,29 @@ def preview_training_batch(train_dataloader, mode, n_imgs = 40):
             print(f"\nSaved {imgs_saved} preview training imgs to {outdir}")
             return
 
+def sim_matrix(a, b, eps=1e-8):
+    """
+    added eps for numerical stability
+    """
+    b_n = b.norm(dim=1)[:, None]
+    a_norm = a / torch.max(b_n, eps * torch.ones_like(b_n))
+    b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
+    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
+    return sim_mt
+
+def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder):
+    # get all the token embeddings:
+    token_embeds = text_encoder.get_input_embeddings().weight.data
+
+    # Compute the cosine-similarity between the optimized tokens and all the other tokens
+    similarity = sim_matrix(optimized_tokens, token_embeds).squeeze()
+    similarity = similarity.cpu().numpy()
+
+    # print similarity for the most similar tokens:
+    most_similar_tokens = np.argsort(similarity)[::-1]
+    for token_id in most_similar_tokens[:5]:
+        print(f"{tokenizer.decode(token_id)}: {similarity[token_id]:.4f}")
+
 
 def get_models(
     pretrained_model_name_or_path,
@@ -517,8 +540,11 @@ def train_inversion(
                             index_no_updates
                         ] = orig_embeds_params[index_no_updates]
                         
-                        for i, t in enumerate(optimizing_embeds):
-                            print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}")
+                        if global_step % 10 == 0:
+                            print("----------------------")
+                            for i, t in enumerate(optimizing_embeds):
+                                print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}")
+                                print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder)
 
                 global_step += 1
                 progress_bar.update(1)
@@ -654,7 +680,7 @@ def perform_tuning(
                 vae,
                 text_encoder,
                 scheduler,
-                optimized_embeddings = text_encoder.get_input_embeddings().weight[:, :],
+                optimized_embeddings = text_encoder.get_input_embeddings().weight[~index_no_updates, :], 
                 train_inpainting=train_inpainting,
                 t_mutliplier=0.8,
                 mixed_precision=True,
@@ -825,6 +851,10 @@ def train(
     script_start_time = time.time()
     torch.manual_seed(seed)
 
+    if use_template == "person" and not use_face_segmentation_condition:
+        print("###  WARNING  ### : Using person template without face segmentation condition")
+        print("When training people, it is highly recommended to use face segmentation condition!!")
+
     # Get a dict with all the arguments:
     args_dict = locals()
 

From aab969c45fa45c42eac8ed22b937c0e301e15db4 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 11:16:27 -0800
Subject: [PATCH 02/13] print most similar tokens during ti phase

---
 lora_diffusion/cli_lora_pti.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 9ba1e76..bc402fa 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -71,8 +71,8 @@ def sim_matrix(a, b, eps=1e-8):
     """
     added eps for numerical stability
     """
-    b_n = b.norm(dim=1)[:, None]
-    a_norm = a / torch.max(b_n, eps * torch.ones_like(b_n))
+    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
+    a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
     b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
     sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
     return sim_mt

From f54cf6c1f082f053b40305da8d7b59afec66e6dd Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 11:41:14 -0800
Subject: [PATCH 03/13] print most similar tokens during ti phase

---
 lora_diffusion/cli_lora_pti.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index bc402fa..77b062c 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -77,7 +77,18 @@ def sim_matrix(a, b, eps=1e-8):
     sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
     return sim_mt
 
-def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder):
+def compute_pairwise_distances(x, y):
+    # compute the L2 distance of each row in x to each row in y (both are torch tensors)
+    n = x.size(0)
+    m = y.size(0)
+    assert x.size(1) == y.size(1)
+
+    x = x.unsqueeze(1).expand(n, m, x.size(1))
+    y = y.unsqueeze(0).expand(n, m, x.size(1))
+
+    return torch.pow(x - y, 2).sum(2)
+
+def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder, n=5):
     # get all the token embeddings:
     token_embeds = text_encoder.get_input_embeddings().weight.data
 
@@ -85,10 +96,18 @@ def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder):
     similarity = sim_matrix(optimized_tokens, token_embeds).squeeze()
     similarity = similarity.cpu().numpy()
 
+    distances = compute_pairwise_distances(optimized_tokens, token_embeds).squeeze()
+    distances = distances.cpu().numpy()
+
     # print similarity for the most similar tokens:
     most_similar_tokens = np.argsort(similarity)[::-1]
-    for token_id in most_similar_tokens[:5]:
-        print(f"{tokenizer.decode(token_id)}: {similarity[token_id]:.4f}")
+    # print embedding of most similar token:
+    embd = token_embeds[most_similar_tokens[0]]
+    
+    #print(f"Embedding of token: {embd[:].cpu().detach().numpy()}")
+    print(f"--- Most similar tokens to {tokenizer.decode(most_similar_tokens[0])}:")
+    for token_id in most_similar_tokens[1:n+1]:
+        print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"")
 
 
 def get_models(
@@ -540,8 +559,8 @@ def train_inversion(
                             index_no_updates
                         ] = orig_embeds_params[index_no_updates]
                         
-                        if global_step % 10 == 0:
-                            print("----------------------")
+                        if global_step % 20 == 0:
+                            print("------------------------------")
                             for i, t in enumerate(optimizing_embeds):
                                 print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}")
                                 print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder)

From 8f2c7b8c8493dff79f2fcc717df140fded5dfe12 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 11:55:52 -0800
Subject: [PATCH 04/13] cleanup prints

---
 lora_diffusion/cli_lora_pti.py | 61 +++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 77b062c..6432d7f 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -77,37 +77,41 @@ def sim_matrix(a, b, eps=1e-8):
     sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
     return sim_mt
 
-def compute_pairwise_distances(x, y):
+
+def compute_pairwise_distances(x,y):
     # compute the L2 distance of each row in x to each row in y (both are torch tensors)
-    n = x.size(0)
-    m = y.size(0)
-    assert x.size(1) == y.size(1)
+    # x is a torch tensor of shape (m, d)
+    # y is a torch tensor of shape (n, d)
+    # returns a torch tensor of shape (m, n)
+
+    n = y.shape[0]
+    m = x.shape[0]
+    d = x.shape[1]
 
-    x = x.unsqueeze(1).expand(n, m, x.size(1))
-    y = y.unsqueeze(0).expand(n, m, x.size(1))
+    x = x.unsqueeze(1).expand(m, n, d)
+    y = y.unsqueeze(0).expand(m, n, d)
 
     return torch.pow(x - y, 2).sum(2)
 
-def print_most_similar_tokens(tokenizer, optimized_tokens, text_encoder, n=5):
-    # get all the token embeddings:
-    token_embeds = text_encoder.get_input_embeddings().weight.data
 
-    # Compute the cosine-similarity between the optimized tokens and all the other tokens
-    similarity = sim_matrix(optimized_tokens, token_embeds).squeeze()
-    similarity = similarity.cpu().numpy()
+def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10):
+    with torch.no_grad():
+        # get all the token embeddings:
+        token_embeds = text_encoder.get_input_embeddings().weight.data
+
+        # Compute the cosine-similarity between the optimized tokens and all the other tokens
+        similarity = sim_matrix(optimized_token.unsqueeze(0), token_embeds).squeeze()
+        similarity = similarity.detach().cpu().numpy()
+
+        distances = compute_pairwise_distances(optimized_token.unsqueeze(0), token_embeds).squeeze()
+        distances = distances.detach().cpu().numpy()
 
-    distances = compute_pairwise_distances(optimized_tokens, token_embeds).squeeze()
-    distances = distances.cpu().numpy()
+        # print similarity for the most similar tokens:
+        most_similar_tokens = np.argsort(similarity)[::-1]
 
-    # print similarity for the most similar tokens:
-    most_similar_tokens = np.argsort(similarity)[::-1]
-    # print embedding of most similar token:
-    embd = token_embeds[most_similar_tokens[0]]
-    
-    #print(f"Embedding of token: {embd[:].cpu().detach().numpy()}")
-    print(f"--- Most similar tokens to {tokenizer.decode(most_similar_tokens[0])}:")
-    for token_id in most_similar_tokens[1:n+1]:
-        print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"")
+        print(f"{tokenizer.decode(most_similar_tokens[0])} --> mean: {optimized_token.mean().item():.3f}, std: {optimized_token.std().item():.3f}, norm: {optimized_token.norm():.4f}")
+        for token_id in most_similar_tokens[1:n+1]:
+            print(f"sim of {similarity[token_id]:.3f} & L2 of {distances[token_id]:.3f} with \"{tokenizer.decode(token_id)}\"")
 
 
 def get_models(
@@ -559,11 +563,10 @@ def train_inversion(
                             index_no_updates
                         ] = orig_embeds_params[index_no_updates]
                         
-                        if global_step % 20 == 0:
+                        if global_step % 50 == 0:
                             print("------------------------------")
                             for i, t in enumerate(optimizing_embeds):
-                                print(f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}")
-                                print_most_similar_tokens(tokenizer, t.unsqueeze(0), text_encoder)
+                                print_most_similar_tokens(tokenizer, t, text_encoder)
 
                 global_step += 1
                 progress_bar.update(1)
@@ -728,6 +731,12 @@ def perform_tuning(
                         index_no_updates
                     ] = orig_embeds_params[index_no_updates]
 
+            if global_step % 100 == 0:
+                optimizing_embeds = text_encoder.get_input_embeddings().weight[~index_no_updates]
+                print("------------------------------")
+                for i, t in enumerate(optimizing_embeds):
+                    print_most_similar_tokens(tokenizer, t, text_encoder)
+
 
             global_step += 1
 

From 4f5516118dad3279832cfbad3ca673bfbcd7e39d Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 12:03:28 -0800
Subject: [PATCH 05/13] cleanup prints

---
 lora_diffusion/cli_lora_pti.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 6432d7f..c86054c 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -523,12 +523,13 @@ def train_inversion(
 
                 if global_step % accum_iter == 0:
                     # print gradient of text encoder embedding
-                    print(
-                        text_encoder.get_input_embeddings()
-                        .weight.grad[index_updates, :]
-                        .norm(dim=-1)
-                        .mean()
-                    )
+                    if 0:
+                        print(
+                            text_encoder.get_input_embeddings()
+                            .weight.grad[index_updates, :]
+                            .norm(dim=-1)
+                            .mean()
+                        )
                     optimizer.step()
                     optimizer.zero_grad()
 

From edeef64b1dd210ee34b39aee867bcadeda809088 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 14 Feb 2023 18:52:28 -0800
Subject: [PATCH 06/13] minor changes

---
 lora_diffusion/cli_lora_pti.py | 3 +--
 lora_diffusion/dataset.py      | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index c86054c..5f90718 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -632,7 +632,7 @@ def train_inversion(
                 return
 
 import matplotlib.pyplot as plt
-def plot_loss_curve(losses, name, moving_avg=20):
+def plot_loss_curve(losses, name, moving_avg=5):
     losses = np.array(losses)
     losses = np.convolve(losses, np.ones(moving_avg)/moving_avg, mode='valid')
     plt.plot(losses)
@@ -1097,7 +1097,6 @@ def train(
         [el.numel() for el in itertools.chain(*unet_lora_params)]
     )
     print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params)
-
     print(f"PTI : has {len(unet_lora_params)} lora")
     print("PTI : Before training:")
     inspect_lora(unet)
diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py
index e51c301..2f7abac 100644
--- a/lora_diffusion/dataset.py
+++ b/lora_diffusion/dataset.py
@@ -44,6 +44,7 @@
     "{}",
     "a picture of {}",
     "a closeup of {}",
+    "a closeup of {}'s face",
     "a closeup photo of {}",
     "a close-up picture of {}",
     "a photo of {}",
@@ -60,6 +61,7 @@
     "{} is having fun, 4k photograph",
     "{} wearing a plaidered shirt standing next to another person",
     "smiling {} in a hoodie and sweater",
+    "{} smiling at the camera",
     "a photo of the cool {}",
     "a close-up photo of {}",
     "a bright photo of {}",

From 82264dea170bb1aeb22cbce15efbeeb422a7b6da Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Wed, 15 Feb 2023 14:42:15 -0800
Subject: [PATCH 07/13] big bugfix + allow for loading pretrained textual
 inversion embeddings

---
 lora_diffusion/cli_lora_pti.py | 80 +++++++++++++++++++++++-----------
 lora_diffusion/dataset.py      |  6 +--
 lora_diffusion/lora.py         | 11 +++--
 3 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 5f90718..e748c9e 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -46,6 +46,8 @@
     prepare_clip_model_sets,
     evaluate_pipe,
     UNET_EXTENDED_TARGET_REPLACE,
+    parse_safeloras_embeds,
+    apply_learned_embed_in_clip,
 )
 
 def preview_training_batch(train_dataloader, mode, n_imgs = 40):
@@ -586,7 +588,7 @@ def train_inversion(
                     placeholder_token_ids=placeholder_token_ids,
                     placeholder_tokens=placeholder_tokens,
                     save_path=os.path.join(
-                        save_path, f"step_inv_{global_step}.safetensors"
+                        save_path, f"step_inv_{global_step:04d}.safetensors"
                     ),
                     save_lora=False,
                 )
@@ -751,7 +753,7 @@ def perform_tuning(
                     placeholder_token_ids=placeholder_token_ids,
                     placeholder_tokens=placeholder_tokens,
                     save_path=os.path.join(
-                        save_path, f"step_{global_step}.safetensors"
+                        save_path, f"step_{global_step:04d}.safetensors"
                     ),
                     target_replace_module_text=lora_clip_target_modules,
                     target_replace_module_unet=lora_unet_target_modules,
@@ -761,8 +763,8 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-
                 print("LORA Unet Moved", moved)
+
                 moved = (
                     torch.tensor(
                         list(itertools.chain(*inspect_lora(text_encoder).values()))
@@ -770,7 +772,6 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-
                 print("LORA CLIP Moved", moved)
 
                 if log_wandb:
@@ -833,6 +834,7 @@ def train(
     placeholder_tokens: str = "",
     placeholder_token_at_data: Optional[str] = None,
     initializer_tokens: Optional[str] = None,
+    load_pretrained_inversion_embeddings_path: Optional[str] = None,
     seed: int = 42,
     resolution: int = 512,
     color_jitter: bool = True,
@@ -880,6 +882,9 @@ def train(
     script_start_time = time.time()
     torch.manual_seed(seed)
 
+    lora_rank_unet = lora_rank
+    lora_rank_text_encoder = lora_rank
+
     if use_template == "person" and not use_face_segmentation_condition:
         print("###  WARNING  ### : Using person template without face segmentation condition")
         print("When training people, it is highly recommended to use face segmentation condition!!")
@@ -900,7 +905,7 @@ def train(
 
     if output_dir is not None:
         os.makedirs(output_dir, exist_ok=True)
-    # print(placeholder_tokens, initializer_tokens)
+
     if len(placeholder_tokens) == 0:
         placeholder_tokens = []
         print("PTI : Placeholder Tokens not given, using null token")
@@ -933,6 +938,7 @@ def train(
 
     print("PTI : Placeholder Tokens", placeholder_tokens)
     print("PTI : Initializer Tokens", initializer_tokens)
+    print("PTI : Token Map: ", token_map)
 
     # get the models
     text_encoder, vae, unet, tokenizer, placeholder_token_ids = get_models(
@@ -984,8 +990,6 @@ def train(
         train_inpainting=train_inpainting,
     )
 
-    train_dataset.blur_amount = 200
-
     if train_inpainting:
         assert not cached_latents, "Cached latents not supported for inpainting"
 
@@ -1022,7 +1026,7 @@ def train(
         vae = None
 
     # STEP 1 : Perform Inversion
-    if perform_inversion and not cached_latents:
+    if perform_inversion and not cached_latents and (load_pretrained_inversion_embeddings_path is None):
         preview_training_batch(train_dataloader, "inversion")
 
         print("PTI : Performing Inversion")
@@ -1073,16 +1077,32 @@ def train(
         del ti_optimizer
         print("###############  Inversion Done  ###############")
 
+    elif load_pretrained_inversion_embeddings_path is not None:
+
+        print("PTI : Loading pretrained inversion embeddings..")
+        from safetensors.torch import safe_open
+        # Load the pretrained embeddings from the lora file:
+        safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu")
+        #monkeypatch_or_replace_safeloras(pipe, safeloras)
+        tok_dict = parse_safeloras_embeds(safeloras)
+        apply_learned_embed_in_clip(
+                tok_dict,
+                text_encoder,
+                tokenizer,
+                idempotent=True,
+            )
+
     # Next perform Tuning with LoRA:
     if not use_extended_lora:
         unet_lora_params, _ = inject_trainable_lora(
             unet,
-            r=lora_rank,
+            r=lora_rank_unet,
             target_replace_module=lora_unet_target_modules,
             dropout_p=lora_dropout_p,
             scale=lora_scale,
         )
         print("PTI : not use_extended_lora...")
+        print("PTI : Will replace modules: ", lora_unet_target_modules)
     else:
         print("PTI : USING EXTENDED UNET!!!")
         lora_unet_target_modules = (
@@ -1090,16 +1110,11 @@ def train(
         )
         print("PTI : Will replace modules: ", lora_unet_target_modules)
         unet_lora_params, _ = inject_trainable_lora_extended(
-            unet, r=lora_rank, target_replace_module=lora_unet_target_modules
+            unet, r=lora_rank_unet, target_replace_module=lora_unet_target_modules
         )
 
-    n_optimizable_unet_params = sum(
-        [el.numel() for el in itertools.chain(*unet_lora_params)]
-    )
-    print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params)
-    print(f"PTI : has {len(unet_lora_params)} lora")
-    print("PTI : Before training:")
-    inspect_lora(unet)
+    #n_optimizable_unet_params = sum([el.numel() for el in itertools.chain(*unet_lora_params)])
+    #print("PTI : Number of optimizable UNET parameters: ", n_optimizable_unet_params)
 
     params_to_optimize = [
         {"params": itertools.chain(*unet_lora_params), "lr": unet_lr},
@@ -1131,15 +1146,15 @@ def train(
         text_encoder_lora_params, _ = inject_trainable_lora(
             text_encoder,
             target_replace_module=lora_clip_target_modules,
-            r=lora_rank,
+            r=lora_rank_text_encoder,
         )
         params_to_optimize += [
-            {
-                "params": itertools.chain(*text_encoder_lora_params),
-                "lr": text_encoder_lr,
-            }
+            {"params": itertools.chain(*text_encoder_lora_params),
+                "lr": text_encoder_lr}
         ]
-        inspect_lora(text_encoder)
+
+        #n_optimizable_text_Encoder_params = sum( [el.numel() for el in itertools.chain(*text_encoder_lora_params)])
+        #print("PTI : Number of optimizable text-encoder parameters: ", n_optimizable_text_Encoder_params)
 
     lora_optimizers = optim.AdamW(params_to_optimize, weight_decay=weight_decay_lora)
 
@@ -1148,8 +1163,6 @@ def train(
         print("Training text encoder!")
         text_encoder.train()
 
-    train_dataset.blur_amount = 70
-
     lr_scheduler_lora = get_scheduler(
         lr_scheduler_lora,
         optimizer=lora_optimizers,
@@ -1159,6 +1172,22 @@ def train(
     if not cached_latents: 
         preview_training_batch(train_dataloader, "tuning")
 
+    #print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params)
+    print(f"PTI : has {len(unet_lora_params)} lora")
+    print("PTI : Before training:")
+
+    moved = (
+        torch.tensor(list(itertools.chain(*inspect_lora(unet).values())))
+        .mean().item())
+    print(f"LORA Unet Moved {moved:.6f}")
+
+
+    moved = (
+        torch.tensor(
+            list(itertools.chain(*inspect_lora(text_encoder).values()))
+        ).mean().item())
+    print(f"LORA CLIP Moved {moved:.6f}")
+
     perform_tuning(
         unet,
         vae,
@@ -1190,6 +1219,7 @@ def train(
     training_time = time.time() - script_start_time
     print(f"Training time: {training_time/60:.1f} minutes")
     args_dict["training_time_s"] = int(training_time)
+    args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader))
 
     # Save the args_dict to the output directory as a json file:
     with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f:
diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py
index 2f7abac..f566c83 100644
--- a/lora_diffusion/dataset.py
+++ b/lora_diffusion/dataset.py
@@ -207,8 +207,7 @@ def __init__(
         resize=True,
         use_mask_captioned_data=False,
         use_face_segmentation_condition=False,
-        train_inpainting=False,
-        blur_amount: int = 70,
+        train_inpainting=False
     ):
         self.size = size
         self.tokenizer = tokenizer
@@ -341,8 +340,6 @@ def __init__(
             ]
         )
 
-        self.blur_amount = blur_amount
-
         print("Captions:")
         print(self.captions)
 
@@ -350,7 +347,6 @@ def tune_h_flip_prob(self, training_progress):
         if self.h_flip:
             # Tune the h_flip probability to be 0.5 training_progress is 0 and end_prob when training_progress is 1
             self.h_flip_prob = 0.5 + (self.final_flip_prob - 0.5) * training_progress
-            print(f"h_flip_prob: {self.h_flip_prob:.3f}")
 
     def __len__(self):
         return self._length
diff --git a/lora_diffusion/lora.py b/lora_diffusion/lora.py
index bc3c5d1..52bc829 100644
--- a/lora_diffusion/lora.py
+++ b/lora_diffusion/lora.py
@@ -4,6 +4,7 @@
 from typing import Callable, Dict, List, Optional, Set, Tuple, Type, Union
 
 import numpy as np
+import random
 import PIL
 import torch
 import torch.nn as nn
@@ -801,7 +802,7 @@ def monkeypatch_or_replace_safeloras(models, safeloras):
 
     for name, (lora, ranks, target) in loras.items():
         model = getattr(models, name, None)
-
+        
         if not model:
             print(f"No model provided for {name}, contained in Lora")
             continue
@@ -1028,17 +1029,19 @@ def inspect_lora(model):
 
     for name, _module in model.named_modules():
         if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]:
+            # get the up and down weight matrices:
             ups = _module.lora_up.weight.data.clone()
             downs = _module.lora_down.weight.data.clone()
-
+            
+            # flatten and compute dot product:
             wght: torch.Tensor = ups.flatten(1) @ downs.flatten(1)
-
+            # get the mean of the absolute value of the dot product:
             dist = wght.flatten().abs().mean().item()
+
             if name in moved:
                 moved[name].append(dist)
             else:
                 moved[name] = [dist]
-
     return moved
 
 

From f7471d40c0a3ea407f318fe6c66f51c4a54b4e90 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Thu, 16 Feb 2023 21:07:58 -0800
Subject: [PATCH 08/13] add grid_search script

---
 grid_train_lora.py             | 130 +++++++++++++++++++++++++++++++++
 lora_diffusion/cli_lora_pti.py |  63 +++++++++++++---
 run_segment.py                 |  85 +++++++++++++++++++++
 3 files changed, 266 insertions(+), 12 deletions(-)
 create mode 100644 grid_train_lora.py
 create mode 100644 run_segment.py

diff --git a/grid_train_lora.py b/grid_train_lora.py
new file mode 100644
index 0000000..1a0a6f5
--- /dev/null
+++ b/grid_train_lora.py
@@ -0,0 +1,130 @@
+import itertools
+import os
+import random
+import time
+
+def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None):
+  if seed is not None:
+      random.seed(seed)
+  else:
+      random.seed(int(time.time()))
+
+  # Split the parameter grid into fixed and variable arguments
+  fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))}
+  variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args}
+
+  # Generate all combinations of variable arguments
+  variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()]))
+  variable_keys = list(variable_args.keys())
+
+  # Generate a long list of grid_values by randomly sampling each argument list
+  long_grid_values = []
+  for i in range(10000):
+      values = {}
+      for k in variable_keys:
+          if isinstance(param_grid[k], list):
+              values[k] = random.choice(param_grid[k])
+          else:
+              values[k] = param_grid[k]
+      long_grid_values.append(values)
+  
+  # Randomly sample a subset of the long list of grid_values
+  grid_values = random.sample(long_grid_values, n)
+
+  # Combine fixed and variable arguments into a single dictionary
+  grid_values = [{**fixed_args, **values} for values in grid_values]
+
+  # Define the command to execute your Python job with input arguments
+  cmd = 'python lora_diffusion/cli_lora_pti.py'
+
+  # shuffle the grid values ordering:
+  random.shuffle(grid_values)
+  already_done = []
+
+  # Loop over the grid values and execute the Python job with each combination of input arguments
+  for i, values in enumerate(grid_values[:n]):
+    if values in already_done: #This combo has already been tried, skip..
+      continue
+
+    already_done.append(values.copy())
+
+    # get the datadirectory name:
+    data_dir = "_".join(values['instance_data_dir'].split('/')[-2:])
+
+    # generate a short, pseudorandom character id for this run:
+    id_str = ''.join(random.choice('0123456789abcdef') for i in range(6))
+
+    values['output_dir'] = f"./exps/{dirname}/{data_dir}_{i:02d}_{id_str}"
+
+    arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()])
+    full_cmd = f'{cmd} {arg_str}'
+    print('------------------------------------------')
+    print(f'Running command: {i+1}/{n}')
+
+    # pretty print the values dictionary:
+    for k, v in values.items():
+      print(f'{k}:{" "*(50-len(k))}{v}')
+
+    if not test:
+      os.system(full_cmd)
+
+
+"""
+
+export CUDA_VISIBLE_DEVICES=3
+conda activate diffusers
+cd /home/xander/Projects/cog/lora
+python grid_train_lora.py
+
+
+'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train",
+'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train_one",
+'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/niko/train",
+'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train",
+
+
+"""
+
+n_to_run = 50
+
+param_grid = {
+  'pretrained_model_name_or_path': ['dreamlike-art/dreamlike-photoreal-2.0'],
+  'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/ready/xander/train",
+
+  'train_text_encoder':            True,
+  'perform_inversion':             True,
+  'learning_rate_ti':              [1e-4, 2.5e-4],
+  'continue_inversion':            True,
+  'continue_inversion_lr':         [0.5e-5, 2e-5, 1e-4],
+  'learning_rate_unet':            [1.0e-5, 2.5e-5],
+  'learning_rate_text':            [1.0e-5, 2.5e-5],
+  'save_steps':                    50,
+  'max_train_steps_ti':            [200, 300, 400], 
+  'max_train_steps_tuning':        [300, 450, 600], 
+  'weight_decay_ti':               [0.001, 0.005],
+  'weight_decay_lora':             [0.0001, 0.001],
+  'lora_rank_unet':                [1,2,4],
+  'lora_rank_text_encoder':        [1,4,8,16],
+  'use_extended_lora':             [False, True],
+
+  'use_face_segmentation_condition': True,
+  'use_mask_captioned_data':       False,
+  'placeholder_tokens':            "\"<person1>|<person2>\"",
+  'proxy_token':                   "person",
+  'use_template':                  "person",
+  'clip_ti_decay':                 True,
+
+  'cached_latents':                False,
+  'train_batch_size':              4,
+  'gradient_accumulation_steps':   1,
+  'color_jitter':                  True,
+  'scale_lr':                      True,
+  'lr_scheduler':                  "linear",
+  'lr_warmup_steps':               0,
+
+  'resolution':                    512,
+  'enable_xformers_memory_efficient_attention': True,
+
+}
+
+run_lora_experiment(param_grid, n=n_to_run)
\ No newline at end of file
diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index e748c9e..0f8cf75 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -96,7 +96,7 @@ def compute_pairwise_distances(x,y):
     return torch.pow(x - y, 2).sum(2)
 
 
-def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10):
+def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=5):
     with torch.no_grad():
         # get all the token embeddings:
         token_embeds = text_encoder.get_input_embeddings().weight.data
@@ -334,6 +334,24 @@ def collate_fn(examples):
 
     return train_dataloader
 
+def get_lora_norm(model):
+    norm, n_elements = 0, 0
+
+    for name, _module in model.named_modules():
+        if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]:
+            # get the up and down weight matrices:
+            ups = _module.lora_up.weight.data
+            downs = _module.lora_down.weight.data
+            
+            # flatten and compute dot product:
+            wght = (ups.flatten(1) @ downs.flatten(1)).flatten()
+
+            # add to the total norm:
+            norm += wght.abs().sum()
+            n_elements += wght.shape[0]
+
+    return norm / n_elements
+
 
 def loss_step(
     batch,
@@ -452,6 +470,15 @@ def loss_step(
         embedding_norm_loss = (embedding_norm - target_norm)**2
         loss += 0.005*embedding_norm_loss
 
+    if 0: #disable norm regularization for now
+        unet_norm = get_lora_norm(unet)
+        text_encoder_norm = get_lora_norm(text_encoder)
+        print(f"text_encoder norm: {text_encoder_norm.item():.6f}, unet_norm: {unet_norm.item():.6f}", )
+        norm_loss = 0.5 * unet_norm + 0.5 * text_encoder_norm
+        norm_loss_f = 0.0
+        print(f"loss: {loss.item():.6f}, norm_loss: {norm_loss.item():.6f}, norm_loss_f = {norm_loss_f:.1f}")
+        loss += norm_loss_f * norm_loss
+
     return loss
 
 
@@ -763,7 +790,7 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-                print("LORA Unet Moved", moved)
+                print(f"LORA Unet Moved {moved:.6f}")
 
                 moved = (
                     torch.tensor(
@@ -772,7 +799,7 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-                print("LORA CLIP Moved", moved)
+                print(f"LORA CLIP Moved {moved:.6f}")
 
                 if log_wandb:
                     with torch.no_grad():
@@ -845,7 +872,8 @@ def train(
     save_steps: int = 100,
     gradient_accumulation_steps: int = 4,
     gradient_checkpointing: bool = False,
-    lora_rank: int = 4,
+    lora_rank_unet: int = 4,
+    lora_rank_text_encoder: int = 4,
     lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"},
     lora_clip_target_modules={"CLIPAttention"},
     lora_dropout_p: float = 0.0,
@@ -882,9 +910,6 @@ def train(
     script_start_time = time.time()
     torch.manual_seed(seed)
 
-    lora_rank_unet = lora_rank
-    lora_rank_text_encoder = lora_rank
-
     if use_template == "person" and not use_face_segmentation_condition:
         print("###  WARNING  ### : Using person template without face segmentation condition")
         print("When training people, it is highly recommended to use face segmentation condition!!")
@@ -1079,7 +1104,7 @@ def train(
 
     elif load_pretrained_inversion_embeddings_path is not None:
 
-        print("PTI : Loading pretrained inversion embeddings..")
+        print(f"PTI : Loading pretrained inversion embeddings from {load_pretrained_inversion_embeddings_path}...")
         from safetensors.torch import safe_open
         # Load the pretrained embeddings from the lora file:
         safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu")
@@ -1176,17 +1201,17 @@ def train(
     print(f"PTI : has {len(unet_lora_params)} lora")
     print("PTI : Before training:")
 
-    moved = (
+    unet_moved = (
         torch.tensor(list(itertools.chain(*inspect_lora(unet).values())))
         .mean().item())
-    print(f"LORA Unet Moved {moved:.6f}")
+    print(f"LORA Unet Moved {unet_moved:.6f}")
 
 
-    moved = (
+    clip_moved = (
         torch.tensor(
             list(itertools.chain(*inspect_lora(text_encoder).values()))
         ).mean().item())
-    print(f"LORA CLIP Moved {moved:.6f}")
+    print(f"LORA CLIP Moved {clip_moved:.6f}")
 
     perform_tuning(
         unet,
@@ -1215,11 +1240,25 @@ def train(
         train_inpainting=train_inpainting,
     )
 
+    unet_moved = (
+        torch.tensor(list(itertools.chain(*inspect_lora(unet).values())))
+        .mean().item())
+    print(f"LORA Unet Moved {unet_moved:.6f}")
+
+    clip_moved = (
+        torch.tensor(
+            list(itertools.chain(*inspect_lora(text_encoder).values()))
+        ).mean().item())
+    print(f"LORA CLIP Moved {clip_moved:.6f}")
+
+
     print("###############  Tuning Done  ###############")
     training_time = time.time() - script_start_time
     print(f"Training time: {training_time/60:.1f} minutes")
     args_dict["training_time_s"] = int(training_time)
     args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader))
+    args_dict["unet_moved"] = unet_moved
+    args_dict["clip_moved"] = clip_moved
 
     # Save the args_dict to the output directory as a json file:
     with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f:
diff --git a/run_segment.py b/run_segment.py
new file mode 100644
index 0000000..4323cdc
--- /dev/null
+++ b/run_segment.py
@@ -0,0 +1,85 @@
+import itertools
+import os
+import random
+import time
+
+def run_lora_experiment(param_grid, cmd, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None):
+  if seed is not None:
+      random.seed(seed)
+  else:
+      random.seed(int(time.time()))
+
+  # Split the parameter grid into fixed and variable arguments
+  fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))}
+  variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args}
+
+  # Generate all combinations of variable arguments
+  variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()]))
+  variable_keys = list(variable_args.keys())
+
+  # Generate a long list of grid_values by randomly sampling each argument list
+  long_grid_values = []
+  for i in range(10000):
+      values = {}
+      for k in variable_keys:
+          if isinstance(param_grid[k], list):
+              values[k] = random.choice(param_grid[k])
+          else:
+              values[k] = param_grid[k]
+      long_grid_values.append(values)
+  
+  # Randomly sample a subset of the long list of grid_values
+  grid_values = random.sample(long_grid_values, n)
+
+  # Combine fixed and variable arguments into a single dictionary
+  grid_values = [{**fixed_args, **values} for values in grid_values]
+
+  # shuffle the grid values ordering:
+  random.shuffle(grid_values)
+  already_done = []
+
+  # Loop over the grid values and execute the Python job with each combination of input arguments
+  for i, values in enumerate(grid_values[:n]):
+    if values in already_done: #This combo has already been tried, skip..
+      continue
+
+    already_done.append(values.copy())
+
+    arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()])
+    full_cmd = f'{cmd} {arg_str}'
+    print('------------------------------------------')
+    print(f'Running command: {i+1}/{n}')
+
+    # pretty print the values dictionary:
+    for k, v in values.items():
+      print(f'{k}:{" "*(50-len(k))}{v}')
+
+    if not test:
+      os.system(full_cmd)
+
+
+"""
+
+export CUDA_VISIBLE_DEVICES=2
+conda activate diffusers
+cd /home/xander/Projects/cog/lora
+python run_segment.py
+
+"""
+
+python_cmd = "python lora_diffusion/preprocess_files.py"
+input_dir = "/home/xander/Pictures/Mars2023/people/run_segment"
+
+for subdir in sorted(os.listdir(input_dir)):
+
+  full_input_dir = os.path.join(input_dir, subdir) + "/imgs"
+  output_dir = os.path.join(input_dir, subdir) + "/train"
+
+  param_grid = {
+    'files': full_input_dir,
+    'output_dir': output_dir,
+    'target_prompts': "face",
+    'target_size': 512,
+  }
+
+  run_lora_experiment(param_grid, python_cmd)
\ No newline at end of file

From c8b00f46089a1a6ca59988d7476de0678b036235 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Thu, 16 Feb 2023 21:10:54 -0800
Subject: [PATCH 09/13] clean

---
 grid_train_lora.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/grid_train_lora.py b/grid_train_lora.py
index 1a0a6f5..fbb3ed1 100644
--- a/grid_train_lora.py
+++ b/grid_train_lora.py
@@ -3,7 +3,7 @@
 import random
 import time
 
-def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None):
+def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "grid_search_results", seed = None):
   if seed is not None:
       random.seed(seed)
   else:
@@ -69,22 +69,6 @@ def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "lora_grid_searc
       os.system(full_cmd)
 
 
-"""
-
-export CUDA_VISIBLE_DEVICES=3
-conda activate diffusers
-cd /home/xander/Projects/cog/lora
-python grid_train_lora.py
-
-
-'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train",
-'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train_one",
-'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/niko/train",
-'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/gene/train",
-
-
-"""
-
 n_to_run = 50
 
 param_grid = {

From a5c3d2ecb92df819c659585974a417c38977f5d2 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Thu, 16 Feb 2023 21:14:40 -0800
Subject: [PATCH 10/13] Remove grid_train_lora.py

---
 grid_train_lora.py             | 114 ---------------------------------
 lora_diffusion/cli_lora_pti.py |  63 ++++--------------
 run_segment.py                 |  85 ------------------------
 3 files changed, 12 insertions(+), 250 deletions(-)
 delete mode 100644 grid_train_lora.py
 delete mode 100644 run_segment.py

diff --git a/grid_train_lora.py b/grid_train_lora.py
deleted file mode 100644
index fbb3ed1..0000000
--- a/grid_train_lora.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import itertools
-import os
-import random
-import time
-
-def run_lora_experiment(param_grid, n=1000, test = 0, dirname = "grid_search_results", seed = None):
-  if seed is not None:
-      random.seed(seed)
-  else:
-      random.seed(int(time.time()))
-
-  # Split the parameter grid into fixed and variable arguments
-  fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))}
-  variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args}
-
-  # Generate all combinations of variable arguments
-  variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()]))
-  variable_keys = list(variable_args.keys())
-
-  # Generate a long list of grid_values by randomly sampling each argument list
-  long_grid_values = []
-  for i in range(10000):
-      values = {}
-      for k in variable_keys:
-          if isinstance(param_grid[k], list):
-              values[k] = random.choice(param_grid[k])
-          else:
-              values[k] = param_grid[k]
-      long_grid_values.append(values)
-  
-  # Randomly sample a subset of the long list of grid_values
-  grid_values = random.sample(long_grid_values, n)
-
-  # Combine fixed and variable arguments into a single dictionary
-  grid_values = [{**fixed_args, **values} for values in grid_values]
-
-  # Define the command to execute your Python job with input arguments
-  cmd = 'python lora_diffusion/cli_lora_pti.py'
-
-  # shuffle the grid values ordering:
-  random.shuffle(grid_values)
-  already_done = []
-
-  # Loop over the grid values and execute the Python job with each combination of input arguments
-  for i, values in enumerate(grid_values[:n]):
-    if values in already_done: #This combo has already been tried, skip..
-      continue
-
-    already_done.append(values.copy())
-
-    # get the datadirectory name:
-    data_dir = "_".join(values['instance_data_dir'].split('/')[-2:])
-
-    # generate a short, pseudorandom character id for this run:
-    id_str = ''.join(random.choice('0123456789abcdef') for i in range(6))
-
-    values['output_dir'] = f"./exps/{dirname}/{data_dir}_{i:02d}_{id_str}"
-
-    arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()])
-    full_cmd = f'{cmd} {arg_str}'
-    print('------------------------------------------')
-    print(f'Running command: {i+1}/{n}')
-
-    # pretty print the values dictionary:
-    for k, v in values.items():
-      print(f'{k}:{" "*(50-len(k))}{v}')
-
-    if not test:
-      os.system(full_cmd)
-
-
-n_to_run = 50
-
-param_grid = {
-  'pretrained_model_name_or_path': ['dreamlike-art/dreamlike-photoreal-2.0'],
-  'instance_data_dir':             "/home/xander/Pictures/Mars2023/people/ready/xander/train",
-
-  'train_text_encoder':            True,
-  'perform_inversion':             True,
-  'learning_rate_ti':              [1e-4, 2.5e-4],
-  'continue_inversion':            True,
-  'continue_inversion_lr':         [0.5e-5, 2e-5, 1e-4],
-  'learning_rate_unet':            [1.0e-5, 2.5e-5],
-  'learning_rate_text':            [1.0e-5, 2.5e-5],
-  'save_steps':                    50,
-  'max_train_steps_ti':            [200, 300, 400], 
-  'max_train_steps_tuning':        [300, 450, 600], 
-  'weight_decay_ti':               [0.001, 0.005],
-  'weight_decay_lora':             [0.0001, 0.001],
-  'lora_rank_unet':                [1,2,4],
-  'lora_rank_text_encoder':        [1,4,8,16],
-  'use_extended_lora':             [False, True],
-
-  'use_face_segmentation_condition': True,
-  'use_mask_captioned_data':       False,
-  'placeholder_tokens':            "\"<person1>|<person2>\"",
-  'proxy_token':                   "person",
-  'use_template':                  "person",
-  'clip_ti_decay':                 True,
-
-  'cached_latents':                False,
-  'train_batch_size':              4,
-  'gradient_accumulation_steps':   1,
-  'color_jitter':                  True,
-  'scale_lr':                      True,
-  'lr_scheduler':                  "linear",
-  'lr_warmup_steps':               0,
-
-  'resolution':                    512,
-  'enable_xformers_memory_efficient_attention': True,
-
-}
-
-run_lora_experiment(param_grid, n=n_to_run)
\ No newline at end of file
diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 0f8cf75..e748c9e 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -96,7 +96,7 @@ def compute_pairwise_distances(x,y):
     return torch.pow(x - y, 2).sum(2)
 
 
-def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=5):
+def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10):
     with torch.no_grad():
         # get all the token embeddings:
         token_embeds = text_encoder.get_input_embeddings().weight.data
@@ -334,24 +334,6 @@ def collate_fn(examples):
 
     return train_dataloader
 
-def get_lora_norm(model):
-    norm, n_elements = 0, 0
-
-    for name, _module in model.named_modules():
-        if _module.__class__.__name__ in ["LoraInjectedLinear", "LoraInjectedConv2d"]:
-            # get the up and down weight matrices:
-            ups = _module.lora_up.weight.data
-            downs = _module.lora_down.weight.data
-            
-            # flatten and compute dot product:
-            wght = (ups.flatten(1) @ downs.flatten(1)).flatten()
-
-            # add to the total norm:
-            norm += wght.abs().sum()
-            n_elements += wght.shape[0]
-
-    return norm / n_elements
-
 
 def loss_step(
     batch,
@@ -470,15 +452,6 @@ def loss_step(
         embedding_norm_loss = (embedding_norm - target_norm)**2
         loss += 0.005*embedding_norm_loss
 
-    if 0: #disable norm regularization for now
-        unet_norm = get_lora_norm(unet)
-        text_encoder_norm = get_lora_norm(text_encoder)
-        print(f"text_encoder norm: {text_encoder_norm.item():.6f}, unet_norm: {unet_norm.item():.6f}", )
-        norm_loss = 0.5 * unet_norm + 0.5 * text_encoder_norm
-        norm_loss_f = 0.0
-        print(f"loss: {loss.item():.6f}, norm_loss: {norm_loss.item():.6f}, norm_loss_f = {norm_loss_f:.1f}")
-        loss += norm_loss_f * norm_loss
-
     return loss
 
 
@@ -790,7 +763,7 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-                print(f"LORA Unet Moved {moved:.6f}")
+                print("LORA Unet Moved", moved)
 
                 moved = (
                     torch.tensor(
@@ -799,7 +772,7 @@ def perform_tuning(
                     .mean()
                     .item()
                 )
-                print(f"LORA CLIP Moved {moved:.6f}")
+                print("LORA CLIP Moved", moved)
 
                 if log_wandb:
                     with torch.no_grad():
@@ -872,8 +845,7 @@ def train(
     save_steps: int = 100,
     gradient_accumulation_steps: int = 4,
     gradient_checkpointing: bool = False,
-    lora_rank_unet: int = 4,
-    lora_rank_text_encoder: int = 4,
+    lora_rank: int = 4,
     lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"},
     lora_clip_target_modules={"CLIPAttention"},
     lora_dropout_p: float = 0.0,
@@ -910,6 +882,9 @@ def train(
     script_start_time = time.time()
     torch.manual_seed(seed)
 
+    lora_rank_unet = lora_rank
+    lora_rank_text_encoder = lora_rank
+
     if use_template == "person" and not use_face_segmentation_condition:
         print("###  WARNING  ### : Using person template without face segmentation condition")
         print("When training people, it is highly recommended to use face segmentation condition!!")
@@ -1104,7 +1079,7 @@ def train(
 
     elif load_pretrained_inversion_embeddings_path is not None:
 
-        print(f"PTI : Loading pretrained inversion embeddings from {load_pretrained_inversion_embeddings_path}...")
+        print("PTI : Loading pretrained inversion embeddings..")
         from safetensors.torch import safe_open
         # Load the pretrained embeddings from the lora file:
         safeloras = safe_open(load_pretrained_inversion_embeddings_path, framework="pt", device="cpu")
@@ -1201,17 +1176,17 @@ def train(
     print(f"PTI : has {len(unet_lora_params)} lora")
     print("PTI : Before training:")
 
-    unet_moved = (
+    moved = (
         torch.tensor(list(itertools.chain(*inspect_lora(unet).values())))
         .mean().item())
-    print(f"LORA Unet Moved {unet_moved:.6f}")
+    print(f"LORA Unet Moved {moved:.6f}")
 
 
-    clip_moved = (
+    moved = (
         torch.tensor(
             list(itertools.chain(*inspect_lora(text_encoder).values()))
         ).mean().item())
-    print(f"LORA CLIP Moved {clip_moved:.6f}")
+    print(f"LORA CLIP Moved {moved:.6f}")
 
     perform_tuning(
         unet,
@@ -1240,25 +1215,11 @@ def train(
         train_inpainting=train_inpainting,
     )
 
-    unet_moved = (
-        torch.tensor(list(itertools.chain(*inspect_lora(unet).values())))
-        .mean().item())
-    print(f"LORA Unet Moved {unet_moved:.6f}")
-
-    clip_moved = (
-        torch.tensor(
-            list(itertools.chain(*inspect_lora(text_encoder).values()))
-        ).mean().item())
-    print(f"LORA CLIP Moved {clip_moved:.6f}")
-
-
     print("###############  Tuning Done  ###############")
     training_time = time.time() - script_start_time
     print(f"Training time: {training_time/60:.1f} minutes")
     args_dict["training_time_s"] = int(training_time)
     args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader))
-    args_dict["unet_moved"] = unet_moved
-    args_dict["clip_moved"] = clip_moved
 
     # Save the args_dict to the output directory as a json file:
     with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f:
diff --git a/run_segment.py b/run_segment.py
deleted file mode 100644
index 4323cdc..0000000
--- a/run_segment.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import itertools
-import os
-import random
-import time
-
-def run_lora_experiment(param_grid, cmd, n=1000, test = 0, dirname = "lora_grid_search_02_xander", seed = None):
-  if seed is not None:
-      random.seed(seed)
-  else:
-      random.seed(int(time.time()))
-
-  # Split the parameter grid into fixed and variable arguments
-  fixed_args = {k: v for k, v in param_grid.items() if isinstance(v, (int, str, bool, float))}
-  variable_args = {k: v for k, v in param_grid.items() if k not in fixed_args}
-
-  # Generate all combinations of variable arguments
-  variable_values = list(itertools.product(*[v if not isinstance(v, list) else [v] for v in variable_args.values()]))
-  variable_keys = list(variable_args.keys())
-
-  # Generate a long list of grid_values by randomly sampling each argument list
-  long_grid_values = []
-  for i in range(10000):
-      values = {}
-      for k in variable_keys:
-          if isinstance(param_grid[k], list):
-              values[k] = random.choice(param_grid[k])
-          else:
-              values[k] = param_grid[k]
-      long_grid_values.append(values)
-  
-  # Randomly sample a subset of the long list of grid_values
-  grid_values = random.sample(long_grid_values, n)
-
-  # Combine fixed and variable arguments into a single dictionary
-  grid_values = [{**fixed_args, **values} for values in grid_values]
-
-  # shuffle the grid values ordering:
-  random.shuffle(grid_values)
-  already_done = []
-
-  # Loop over the grid values and execute the Python job with each combination of input arguments
-  for i, values in enumerate(grid_values[:n]):
-    if values in already_done: #This combo has already been tried, skip..
-      continue
-
-    already_done.append(values.copy())
-
-    arg_str = ' '.join([f'--{k} {v}' for k, v in values.items()])
-    full_cmd = f'{cmd} {arg_str}'
-    print('------------------------------------------')
-    print(f'Running command: {i+1}/{n}')
-
-    # pretty print the values dictionary:
-    for k, v in values.items():
-      print(f'{k}:{" "*(50-len(k))}{v}')
-
-    if not test:
-      os.system(full_cmd)
-
-
-"""
-
-export CUDA_VISIBLE_DEVICES=2
-conda activate diffusers
-cd /home/xander/Projects/cog/lora
-python run_segment.py
-
-"""
-
-python_cmd = "python lora_diffusion/preprocess_files.py"
-input_dir = "/home/xander/Pictures/Mars2023/people/run_segment"
-
-for subdir in sorted(os.listdir(input_dir)):
-
-  full_input_dir = os.path.join(input_dir, subdir) + "/imgs"
-  output_dir = os.path.join(input_dir, subdir) + "/train"
-
-  param_grid = {
-    'files': full_input_dir,
-    'output_dir': output_dir,
-    'target_prompts': "face",
-    'target_size': 512,
-  }
-
-  run_lora_experiment(param_grid, python_cmd)
\ No newline at end of file

From 0f642d3814405728225d3b2239a1339dfe8e3d6a Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Tue, 21 Feb 2023 13:21:54 -0800
Subject: [PATCH 11/13] separate lora ranks for unet and text_encoder

---
 lora_diffusion/cli_lora_pti.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index e748c9e..9dbb7fd 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -845,7 +845,8 @@ def train(
     save_steps: int = 100,
     gradient_accumulation_steps: int = 4,
     gradient_checkpointing: bool = False,
-    lora_rank: int = 4,
+    lora_rank_unet: int = 4,
+    lora_rank_text_encoder: int = 4,
     lora_unet_target_modules={"CrossAttention", "Attention", "GEGLU"},
     lora_clip_target_modules={"CLIPAttention"},
     lora_dropout_p: float = 0.0,
@@ -882,9 +883,6 @@ def train(
     script_start_time = time.time()
     torch.manual_seed(seed)
 
-    lora_rank_unet = lora_rank
-    lora_rank_text_encoder = lora_rank
-
     if use_template == "person" and not use_face_segmentation_condition:
         print("###  WARNING  ### : Using person template without face segmentation condition")
         print("When training people, it is highly recommended to use face segmentation condition!!")
@@ -1219,7 +1217,8 @@ def train(
     training_time = time.time() - script_start_time
     print(f"Training time: {training_time/60:.1f} minutes")
     args_dict["training_time_s"] = int(training_time)
-    args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader))
+    args_dict["n_epochs"] = math.ceil(max_train_steps_tuning / len(train_dataloader.dataset))
+    args_dict["n_training_imgs"] = len(train_dataloader.dataset)
 
     # Save the args_dict to the output directory as a json file:
     with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f:

From d4d67403934d5ab02e7b81c08871ed9aea535fad Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Sat, 25 Feb 2023 23:05:15 -0800
Subject: [PATCH 12/13] updates

---
 lora_diffusion/cli_lora_pti.py |  9 ++++--
 lora_diffusion/dataset.py      |  5 ++++
 lora_diffusion/lora.py         | 50 +++++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py
index 9dbb7fd..eb02efc 100644
--- a/lora_diffusion/cli_lora_pti.py
+++ b/lora_diffusion/cli_lora_pti.py
@@ -107,7 +107,7 @@ def print_most_similar_tokens(tokenizer, optimized_token, text_encoder, n=10):
 
         distances = compute_pairwise_distances(optimized_token.unsqueeze(0), token_embeds).squeeze()
         distances = distances.detach().cpu().numpy()
-
+        
         # print similarity for the most similar tokens:
         most_similar_tokens = np.argsort(similarity)[::-1]
 
@@ -187,11 +187,13 @@ def get_models(
         pretrained_vae_name_or_path or pretrained_model_name_or_path,
         subfolder=None if pretrained_vae_name_or_path else "vae",
         revision=None if pretrained_vae_name_or_path else revision,
+        local_files_only = True,
     )
     unet = UNet2DConditionModel.from_pretrained(
         pretrained_model_name_or_path,
         subfolder="unet",
         revision=revision,
+        local_files_only = True,
     )
 
     return (
@@ -199,7 +201,7 @@ def get_models(
         vae.to(device),
         unet.to(device),
         tokenizer,
-        placeholder_token_ids,
+        placeholder_token_ids
     )
 
 
@@ -949,7 +951,8 @@ def train(
     )
 
     noise_scheduler = DDPMScheduler.from_config(
-        pretrained_model_name_or_path, subfolder="scheduler"
+        pretrained_model_name_or_path, subfolder="scheduler", 
+        local_files_only = True,
     )
 
     if gradient_checkpointing:
diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py
index f566c83..2306b72 100644
--- a/lora_diffusion/dataset.py
+++ b/lora_diffusion/dataset.py
@@ -313,6 +313,11 @@ def __init__(
             for idx in range(len(self.instance_images_path)):
                 self.mask_path.append(f"{instance_data_root}/{idx}.mask.png")
 
+        # Final important variables for this dataset:
+        # self.instance_images_path
+        # self.mask_path
+        # self.captions
+
         self.num_instance_images = len(self.instance_images_path)
         self.token_map = token_map
 
diff --git a/lora_diffusion/lora.py b/lora_diffusion/lora.py
index 52bc829..5919178 100644
--- a/lora_diffusion/lora.py
+++ b/lora_diffusion/lora.py
@@ -535,7 +535,6 @@ def convert_loras_to_safeloras(
 ):
     convert_loras_to_safeloras_with_embeds(modelmap=modelmap, outpath=outpath)
 
-
 def parse_safeloras(
     safeloras,
 ) -> Dict[str, Tuple[List[nn.parameter.Parameter], List[int], List[str]]]:
@@ -597,6 +596,55 @@ def parse_safeloras(
     return loras
 
 
+def dict_to_lora(tensor_dict, metadata):
+    """
+    Converts a dictionary of tensors + metadata into a Lora
+    """
+    loras = {}
+
+    get_name = lambda k: k.split(":")[0]
+
+    keys = list(tensor_dict.keys())
+    keys.sort(key=get_name)
+
+    for name, module_keys in groupby(keys, get_name):
+        info = metadata.get(name)
+
+        if not info:
+            raise ValueError(
+                f"Tensor {name} has no metadata - is this a Lora safetensor?"
+            )
+
+        # Skip Textual Inversion embeds
+        if info == EMBED_FLAG:
+            continue
+
+        # Handle Loras
+        # Extract the targets
+        target = json.loads(info)
+
+        # Build the result lists - Python needs us to preallocate lists to insert into them
+        module_keys = list(module_keys)
+        ranks = [4] * (len(module_keys) // 2)
+        weights = [None] * len(module_keys)
+
+        for key in module_keys:
+            # Split the model name and index out of the key
+            _, idx, direction = key.split(":")
+            idx = int(idx)
+
+            # Add the rank
+            ranks[idx] = int(metadata[f"{name}:{idx}:rank"])
+
+            # Insert the weight into the list
+            idx = idx * 2 + (1 if direction == "down" else 0)
+            weights[idx] = nn.parameter.Parameter(tensor_dict[key])
+
+        loras[name] = (weights, ranks, target)
+
+    return loras
+
+
 def parse_safeloras_embeds(
     safeloras,
 ) -> Dict[str, torch.Tensor]:

From 2a473a07f8389aa82f0f020ed71a01cef654ae94 Mon Sep 17 00:00:00 2001
From: xander <xandersteenbruggex@gmail.com>
Date: Sat, 25 Feb 2023 23:17:07 -0800
Subject: [PATCH 13/13] allow caching loaded dataset imgs

---
 lora_diffusion/dataset.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py
index 2306b72..f191c12 100644
--- a/lora_diffusion/dataset.py
+++ b/lora_diffusion/dataset.py
@@ -345,6 +345,13 @@ def __init__(
             ]
         )
 
+        self.instance_images = []
+
+        if len(self.instance_images_path) < 20:
+            # Load all the images into memory:
+            for f in self.instance_images_path:
+                self.instance_images.append(Image.open(f).convert("RGB"))
+
         print("Captions:")
         print(self.captions)
 
@@ -358,11 +365,14 @@ def __len__(self):
 
     def __getitem__(self, index):
         example = {}
-        instance_image = Image.open(
-            self.instance_images_path[index % self.num_instance_images]
-        )
-        if not instance_image.mode == "RGB":
-            instance_image = instance_image.convert("RGB")
+
+        if len(self.instance_images) > 0:
+            instance_image = self.instance_images[index % self.num_instance_images]
+        else:
+            instance_image = Image.open(
+                self.instance_images_path[index % self.num_instance_images]
+            ).convert("RGB")
+
         example["instance_images"] = self.image_transforms(instance_image)
 
         if self.train_inpainting: