From 909032cd718d864600c26e5e10f67105c52ecdbe Mon Sep 17 00:00:00 2001
From: Ramon <ramon@verrassendhollands.nl>
Date: Wed, 26 Feb 2025 08:40:50 +0100
Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20improve=20weight=20l?=
 =?UTF-8?q?oading=20logic?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/model/yolo.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/yolo/model/yolo.py b/yolo/model/yolo.py
index cc9ce20ba..bb0def33a 100644
--- a/yolo/model/yolo.py
+++ b/yolo/model/yolo.py
@@ -129,27 +129,35 @@ def save_load_weights(self, weights: Union[Path, OrderedDict]):
             weights = torch.load(weights, map_location=torch.device("cpu"), weights_only=False)
         if "model_state_dict" in weights:
             weights = weights["model_state_dict"]
+        if "state_dict" in weights:
+            weights = weights["state_dict"]
 
-        model_state_dict = self.model.state_dict()
+        model_state_dict = self.state_dict()
 
         # TODO1: autoload old version weight
         # TODO2: weight transform if num_class difference
 
         error_dict = {"Mismatch": set(), "Not Found": set()}
         for model_key, model_weight in model_state_dict.items():
-            if model_key not in weights:
+
+            weights_key = model_key
+            if weights_key not in weights:  #.ckpt
+                weights_key = "model." + model_key
+            if weights_key not in weights:  #.pt old
+                weights_key = model_key[6:]
+            if weights_key not in weights:
                 error_dict["Not Found"].add(tuple(model_key.split(".")[:-2]))
                 continue
-            if model_weight.shape != weights[model_key].shape:
+            if model_weight.shape != weights[weights_key].shape:
                 error_dict["Mismatch"].add(tuple(model_key.split(".")[:-2]))
                 continue
-            model_state_dict[model_key] = weights[model_key]
+            model_state_dict[model_key] = weights[weights_key]
 
         for error_name, error_set in error_dict.items():
             for weight_name in error_set:
                 logger.warning(f":warning: Weight {error_name} for key: {'.'.join(weight_name)}")
 
-        self.model.load_state_dict(model_state_dict)
+        self.load_state_dict(model_state_dict)
 
 
 def create_model(model_cfg: ModelConfig, weight_path: Union[bool, Path] = True, class_num: int = 80) -> YOLO:

From 126995d003c55b40b0bedb929bd6b2dd756ab57b Mon Sep 17 00:00:00 2001
From: Ramon <ramon@verrassendhollands.nl>
Date: Wed, 26 Feb 2025 08:41:00 +0100
Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20prevent=20KeyError?=
 =?UTF-8?q?=20by=20checking=20for=20'v=5Fnum'=20in=20metrics=20before=20re?=
 =?UTF-8?q?moval?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/utils/logging_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yolo/utils/logging_utils.py b/yolo/utils/logging_utils.py
index 28a536222..4713fa22c 100644
--- a/yolo/utils/logging_utils.py
+++ b/yolo/utils/logging_utils.py
@@ -107,7 +107,8 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch: Any, batch_idx:
         epoch_descript = "[cyan]Train [white]|"
         batch_descript = "[green]Train [white]|"
         metrics = self.get_metrics(trainer, pl_module)
-        metrics.pop("v_num")
+        if "v_num" in metrics:
+            metrics.pop("v_num")
         for metrics_name, metrics_val in metrics.items():
             if "Loss_step" in metrics_name:
                 epoch_descript += f"{metrics_name.removesuffix('_step').split('/')[1]: ^9}|"

From 847e014b6b621988cc77eee7a77e1561b618b409 Mon Sep 17 00:00:00 2001
From: Ramon <ramon@verrassendhollands.nl>
Date: Wed, 26 Feb 2025 08:41:14 +0100
Subject: [PATCH 3/7] =?UTF-8?q?=F0=9F=92=BE=20save=20predictions=20to=20a?=
 =?UTF-8?q?=20text=20file=20with=20bounding=20box=20details?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/tools/solver.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 8246a66d2..f696c7948 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -127,12 +127,26 @@ def predict_step(self, batch, batch_idx):
         images, rev_tensor, origin_frame = batch
         predicts = self.post_process(self(images), rev_tensor=rev_tensor)
         img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
+        
         if getattr(self.predict_loader, "is_stream", None):
             fps = self._display_stream(img)
         else:
             fps = None
+
         if getattr(self.cfg.task, "save_predict", None):
-            self._save_image(img, batch_idx)
+            # self._save_image(img, batch_idx)
+
+            output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt"
+
+            # save predics to file img.name .txt, space separated
+            with open(output_txt_file, 'wb') as f:
+                for bboxes in predicts:
+                    for bbox in bboxes:
+                        class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox]
+                        f.write(f"frame{batch_idx:03d} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n")
+
+            print(f"💾 Saved predictions at {output_txt_file}")
+            
         return img, fps
 
     def _save_image(self, img, batch_idx):

From afa6a7ecb27680bc334e5ab8f226c995d4dae234 Mon Sep 17 00:00:00 2001
From: Ramon <ramonhollands@gmail.com>
Date: Wed, 26 Feb 2025 12:04:12 +0100
Subject: [PATCH 4/7] include image_path.name in results.txt

---
 yolo/tools/data_loader.py | 14 ++++++++------
 yolo/tools/solver.py      |  8 ++++----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index c44f00c68..54df8c6ae 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -272,20 +272,22 @@ def process_image(self, image_path):
         image = Image.open(image_path).convert("RGB")
         if image is None:
             raise ValueError(f"Error loading image: {image_path}")
-        self.process_frame(image)
+        self.process_frame(image, image_path)
 
     def load_video_file(self, video_path):
         import cv2
 
         cap = cv2.VideoCapture(str(video_path))
+        frame_idx = 0
         while self.running:
             ret, frame = cap.read()
             if not ret:
                 break
-            self.process_frame(frame)
+            self.process_frame(frame, f"{video_path.stem}_frame{frame_idx:04d}.png")
+            frame_idx += 1
         cap.release()
 
-    def process_frame(self, frame):
+    def process_frame(self, frame, image_path):
         if isinstance(frame, np.ndarray):
             # TODO: we don't need cv2
             import cv2
@@ -297,9 +299,9 @@ def process_frame(self, frame):
         frame = frame[None]
         rev_tensor = rev_tensor[None]
         if not self.is_stream:
-            self.queue.put((frame, rev_tensor, origin_frame))
+            self.queue.put((frame, rev_tensor, origin_frame, image_path))
         else:
-            self.current_frame = (frame, rev_tensor, origin_frame)
+            self.current_frame = (frame, rev_tensor, origin_frame, image_path)
 
     def __iter__(self) -> Generator[Tensor, None, None]:
         return self
@@ -310,7 +312,7 @@ def __next__(self) -> Tensor:
             if not ret:
                 self.stop()
                 raise StopIteration
-            self.process_frame(frame)
+            self.process_frame(frame, "stream_frame.png")
             return self.current_frame
         else:
             try:
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index f509bd57d..5eec2db36 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -124,7 +124,7 @@ def predict_dataloader(self):
         return self.predict_loader
 
     def predict_step(self, batch, batch_idx):
-        images, rev_tensor, origin_frame = batch
+        images, rev_tensor, origin_frame, image_path = batch
         predicts = self.post_process(self(images), rev_tensor=rev_tensor)
         img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
         
@@ -134,16 +134,16 @@ def predict_step(self, batch, batch_idx):
             fps = None
 
         if getattr(self.cfg.task, "save_predict", None):
-            # self._save_image(img, batch_idx)
+            self._save_image(img, batch_idx)
 
             output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt"
 
             # save predics to file img.name .txt, space separated
-            with open(output_txt_file, 'wb') as f:
+            with open(output_txt_file, 'a') as f:
                 for bboxes in predicts:
                     for bbox in bboxes:
                         class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox]
-                        f.write(f"frame{batch_idx:03d} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n")
+                        f.write(f"{image_path.name} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n")
 
             print(f"💾 Saved predictions at {output_txt_file}")
             

From 6736fe18f93dc14a9ecbbc141df0afbf828a946c Mon Sep 17 00:00:00 2001
From: Ramon <ramonhollands@gmail.com>
Date: Thu, 27 Feb 2025 09:37:33 +0100
Subject: [PATCH 5/7] =?UTF-8?q?=F0=9F=A7=BC=20Clean=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/model/yolo.py   | 4 ++--
 yolo/tools/solver.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/yolo/model/yolo.py b/yolo/model/yolo.py
index 65a3df17e..7e3b1c044 100644
--- a/yolo/model/yolo.py
+++ b/yolo/model/yolo.py
@@ -147,9 +147,9 @@ def save_load_weights(self, weights: Union[Path, OrderedDict]):
         for model_key, model_weight in model_state_dict.items():
 
             weights_key = model_key
-            if weights_key not in weights:  #.ckpt
+            if weights_key not in weights:  # .ckpt
                 weights_key = "model." + model_key
-            if weights_key not in weights:  #.pt old
+            if weights_key not in weights:  # .pt old
                 weights_key = model_key[6:]
             if weights_key not in weights:
                 error_dict["Not Found"].add(tuple(model_key.split(".")[:-2]))
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 5eec2db36..544a842bb 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -127,7 +127,7 @@ def predict_step(self, batch, batch_idx):
         images, rev_tensor, origin_frame, image_path = batch
         predicts = self.post_process(self(images), rev_tensor=rev_tensor)
         img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
-        
+
         if getattr(self.predict_loader, "is_stream", None):
             fps = self._display_stream(img)
         else:
@@ -139,14 +139,14 @@ def predict_step(self, batch, batch_idx):
             output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt"
 
             # save predics to file img.name .txt, space separated
-            with open(output_txt_file, 'a') as f:
+            with open(output_txt_file, "a") as f:
                 for bboxes in predicts:
                     for bbox in bboxes:
                         class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox]
                         f.write(f"{image_path.name} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n")
 
             print(f"💾 Saved predictions at {output_txt_file}")
-            
+
         return img, fps
 
     def _save_image(self, img, batch_idx):

From df93b9a906e2422733fdae097d240205d851523b Mon Sep 17 00:00:00 2001
From: Ramon <ramon@verrassendhollands.nl>
Date: Fri, 28 Feb 2025 09:59:03 +0100
Subject: [PATCH 6/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20load=20images=20in?=
 =?UTF-8?q?=20sync,=20otherwise=20not=20all=20images=20are=20loaded=20on?=
 =?UTF-8?q?=20inference?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/tools/data_loader.py | 11 +++++++----
 yolo/tools/solver.py      |  6 ++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 54df8c6ae..a513dc443 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -234,7 +234,7 @@ def create_dataloader(data_cfg: DataConfig, dataset_cfg: DatasetConfig, task: st
 
 
 class StreamDataLoader:
-    def __init__(self, data_cfg: DataConfig):
+    def __init__(self, data_cfg: DataConfig, asynchronous: bool = True):
         self.source = data_cfg.source
         self.running = True
         self.is_stream = isinstance(self.source, int) or str(self.source).lower().startswith("rtmp://")
@@ -244,13 +244,16 @@ def __init__(self, data_cfg: DataConfig):
 
         if self.is_stream:
             import cv2
-
             self.cap = cv2.VideoCapture(self.source)
         else:
             self.source = Path(self.source)
             self.queue = Queue()
-            self.thread = Thread(target=self.load_source)
-            self.thread.start()
+
+            if asynchronous:
+                self.thread = Thread(target=self.load_source)
+                self.thread.start()
+            else:
+                self.load_source()
 
     def load_source(self):
         if self.source.is_dir():  # image folder
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 5eec2db36..332e960e9 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -6,7 +6,7 @@
 
 from yolo.config.config import Config
 from yolo.model.yolo import create_model
-from yolo.tools.data_loader import create_dataloader
+from yolo.tools.data_loader import StreamDataLoader, create_dataloader
 from yolo.tools.drawer import draw_bboxes
 from yolo.tools.loss_functions import create_loss_function
 from yolo.utils.bounding_box_utils import create_converter, to_metrics_format
@@ -112,7 +112,9 @@ def __init__(self, cfg: Config):
         super().__init__(cfg)
         self.cfg = cfg
         # TODO: Add FastModel
-        self.predict_loader = create_dataloader(cfg.task.data, cfg.dataset, cfg.task.task)
+        # StreamDataLoader has to be synchronous, otherwise not all images are loaded
+        # TODO: Make this load in  parallel
+        self.predict_loader = StreamDataLoader(cfg.task.data, asynchronous=False)
 
     def setup(self, stage):
         self.vec2box = create_converter(

From a128cf117653c276773c7cab6e08079a2f703301 Mon Sep 17 00:00:00 2001
From: Ramon <ramonhollands@gmail.com>
Date: Fri, 28 Feb 2025 10:47:28 +0100
Subject: [PATCH 7/7] =?UTF-8?q?=F0=9F=A7=BC=20Clean=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 yolo/tools/data_loader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index a513dc443..bf2903f17 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -244,6 +244,7 @@ def __init__(self, data_cfg: DataConfig, asynchronous: bool = True):
 
         if self.is_stream:
             import cv2
+
             self.cap = cv2.VideoCapture(self.source)
         else:
             self.source = Path(self.source)