From 909032cd718d864600c26e5e10f67105c52ecdbe Mon Sep 17 00:00:00 2001 From: Ramon Date: Wed, 26 Feb 2025 08:40:50 +0100 Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20improve=20weight=20l?= =?UTF-8?q?oading=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/model/yolo.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/yolo/model/yolo.py b/yolo/model/yolo.py index cc9ce20ba..bb0def33a 100644 --- a/yolo/model/yolo.py +++ b/yolo/model/yolo.py @@ -129,27 +129,35 @@ def save_load_weights(self, weights: Union[Path, OrderedDict]): weights = torch.load(weights, map_location=torch.device("cpu"), weights_only=False) if "model_state_dict" in weights: weights = weights["model_state_dict"] + if "state_dict" in weights: + weights = weights["state_dict"] - model_state_dict = self.model.state_dict() + model_state_dict = self.state_dict() # TODO1: autoload old version weight # TODO2: weight transform if num_class difference error_dict = {"Mismatch": set(), "Not Found": set()} for model_key, model_weight in model_state_dict.items(): - if model_key not in weights: + + weights_key = model_key + if weights_key not in weights: #.ckpt + weights_key = "model." + model_key + if weights_key not in weights: #.pt old + weights_key = model_key[6:] + if weights_key not in weights: error_dict["Not Found"].add(tuple(model_key.split(".")[:-2])) continue - if model_weight.shape != weights[model_key].shape: + if model_weight.shape != weights[weights_key].shape: error_dict["Mismatch"].add(tuple(model_key.split(".")[:-2])) continue - model_state_dict[model_key] = weights[model_key] + model_state_dict[model_key] = weights[weights_key] for error_name, error_set in error_dict.items(): for weight_name in error_set: logger.warning(f":warning: Weight {error_name} for key: {'.'.join(weight_name)}") - self.model.load_state_dict(model_state_dict) + self.load_state_dict(model_state_dict) def create_model(model_cfg: ModelConfig, weight_path: Union[bool, Path] = True, class_num: int = 80) -> YOLO: From 126995d003c55b40b0bedb929bd6b2dd756ab57b Mon Sep 17 00:00:00 2001 From: Ramon Date: Wed, 26 Feb 2025 08:41:00 +0100 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20prevent=20KeyError?= =?UTF-8?q?=20by=20checking=20for=20'v=5Fnum'=20in=20metrics=20before=20re?= =?UTF-8?q?moval?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/utils/logging_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yolo/utils/logging_utils.py b/yolo/utils/logging_utils.py index 28a536222..4713fa22c 100644 --- a/yolo/utils/logging_utils.py +++ b/yolo/utils/logging_utils.py @@ -107,7 +107,8 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch: Any, batch_idx: epoch_descript = "[cyan]Train [white]|" batch_descript = "[green]Train [white]|" metrics = self.get_metrics(trainer, pl_module) - metrics.pop("v_num") + if "v_num" in metrics: + metrics.pop("v_num") for metrics_name, metrics_val in metrics.items(): if "Loss_step" in metrics_name: epoch_descript += f"{metrics_name.removesuffix('_step').split('/')[1]: ^9}|" From 847e014b6b621988cc77eee7a77e1561b618b409 Mon Sep 17 00:00:00 2001 From: Ramon Date: Wed, 26 Feb 2025 08:41:14 +0100 Subject: [PATCH 3/7] =?UTF-8?q?=F0=9F=92=BE=20save=20predictions=20to=20a?= =?UTF-8?q?=20text=20file=20with=20bounding=20box=20details?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/tools/solver.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py index 8246a66d2..f696c7948 100644 --- a/yolo/tools/solver.py +++ b/yolo/tools/solver.py @@ -127,12 +127,26 @@ def predict_step(self, batch, batch_idx): images, rev_tensor, origin_frame = batch predicts = self.post_process(self(images), rev_tensor=rev_tensor) img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list) + if getattr(self.predict_loader, "is_stream", None): fps = self._display_stream(img) else: fps = None + if getattr(self.cfg.task, "save_predict", None): - self._save_image(img, batch_idx) + # self._save_image(img, batch_idx) + + output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt" + + # save predics to file img.name .txt, space separated + with open(output_txt_file, 'wb') as f: + for bboxes in predicts: + for bbox in bboxes: + class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox] + f.write(f"frame{batch_idx:03d} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n") + + print(f"💾 Saved predictions at {output_txt_file}") + return img, fps def _save_image(self, img, batch_idx): From afa6a7ecb27680bc334e5ab8f226c995d4dae234 Mon Sep 17 00:00:00 2001 From: Ramon Date: Wed, 26 Feb 2025 12:04:12 +0100 Subject: [PATCH 4/7] include image_path.name in results.txt --- yolo/tools/data_loader.py | 14 ++++++++------ yolo/tools/solver.py | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py index c44f00c68..54df8c6ae 100644 --- a/yolo/tools/data_loader.py +++ b/yolo/tools/data_loader.py @@ -272,20 +272,22 @@ def process_image(self, image_path): image = Image.open(image_path).convert("RGB") if image is None: raise ValueError(f"Error loading image: {image_path}") - self.process_frame(image) + self.process_frame(image, image_path) def load_video_file(self, video_path): import cv2 cap = cv2.VideoCapture(str(video_path)) + frame_idx = 0 while self.running: ret, frame = cap.read() if not ret: break - self.process_frame(frame) + self.process_frame(frame, f"{video_path.stem}_frame{frame_idx:04d}.png") + frame_idx += 1 cap.release() - def process_frame(self, frame): + def process_frame(self, frame, image_path): if isinstance(frame, np.ndarray): # TODO: we don't need cv2 import cv2 @@ -297,9 +299,9 @@ def process_frame(self, frame): frame = frame[None] rev_tensor = rev_tensor[None] if not self.is_stream: - self.queue.put((frame, rev_tensor, origin_frame)) + self.queue.put((frame, rev_tensor, origin_frame, image_path)) else: - self.current_frame = (frame, rev_tensor, origin_frame) + self.current_frame = (frame, rev_tensor, origin_frame, image_path) def __iter__(self) -> Generator[Tensor, None, None]: return self @@ -310,7 +312,7 @@ def __next__(self) -> Tensor: if not ret: self.stop() raise StopIteration - self.process_frame(frame) + self.process_frame(frame, "stream_frame.png") return self.current_frame else: try: diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py index f509bd57d..5eec2db36 100644 --- a/yolo/tools/solver.py +++ b/yolo/tools/solver.py @@ -124,7 +124,7 @@ def predict_dataloader(self): return self.predict_loader def predict_step(self, batch, batch_idx): - images, rev_tensor, origin_frame = batch + images, rev_tensor, origin_frame, image_path = batch predicts = self.post_process(self(images), rev_tensor=rev_tensor) img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list) @@ -134,16 +134,16 @@ def predict_step(self, batch, batch_idx): fps = None if getattr(self.cfg.task, "save_predict", None): - # self._save_image(img, batch_idx) + self._save_image(img, batch_idx) output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt" # save predics to file img.name .txt, space separated - with open(output_txt_file, 'wb') as f: + with open(output_txt_file, 'a') as f: for bboxes in predicts: for bbox in bboxes: class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox] - f.write(f"frame{batch_idx:03d} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n") + f.write(f"{image_path.name} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n") print(f"💾 Saved predictions at {output_txt_file}") From 6736fe18f93dc14a9ecbbc141df0afbf828a946c Mon Sep 17 00:00:00 2001 From: Ramon Date: Thu, 27 Feb 2025 09:37:33 +0100 Subject: [PATCH 5/7] =?UTF-8?q?=F0=9F=A7=BC=20Clean=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/model/yolo.py | 4 ++-- yolo/tools/solver.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/yolo/model/yolo.py b/yolo/model/yolo.py index 65a3df17e..7e3b1c044 100644 --- a/yolo/model/yolo.py +++ b/yolo/model/yolo.py @@ -147,9 +147,9 @@ def save_load_weights(self, weights: Union[Path, OrderedDict]): for model_key, model_weight in model_state_dict.items(): weights_key = model_key - if weights_key not in weights: #.ckpt + if weights_key not in weights: # .ckpt weights_key = "model." + model_key - if weights_key not in weights: #.pt old + if weights_key not in weights: # .pt old weights_key = model_key[6:] if weights_key not in weights: error_dict["Not Found"].add(tuple(model_key.split(".")[:-2])) diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py index 5eec2db36..544a842bb 100644 --- a/yolo/tools/solver.py +++ b/yolo/tools/solver.py @@ -127,7 +127,7 @@ def predict_step(self, batch, batch_idx): images, rev_tensor, origin_frame, image_path = batch predicts = self.post_process(self(images), rev_tensor=rev_tensor) img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list) - + if getattr(self.predict_loader, "is_stream", None): fps = self._display_stream(img) else: @@ -139,14 +139,14 @@ def predict_step(self, batch, batch_idx): output_txt_file = Path(getattr(self.cfg, "out_path")) / f"results.txt" # save predics to file img.name .txt, space separated - with open(output_txt_file, 'a') as f: + with open(output_txt_file, "a") as f: for bboxes in predicts: for bbox in bboxes: class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox] f.write(f"{image_path.name} {int(class_id)} {x_min} {y_min} {x_max} {y_max} {conf[0]}\n") print(f"💾 Saved predictions at {output_txt_file}") - + return img, fps def _save_image(self, img, batch_idx): From df93b9a906e2422733fdae097d240205d851523b Mon Sep 17 00:00:00 2001 From: Ramon Date: Fri, 28 Feb 2025 09:59:03 +0100 Subject: [PATCH 6/7] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20load=20images=20in?= =?UTF-8?q?=20sync,=20otherwise=20not=20all=20images=20are=20loaded=20on?= =?UTF-8?q?=20inference?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/tools/data_loader.py | 11 +++++++---- yolo/tools/solver.py | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py index 54df8c6ae..a513dc443 100644 --- a/yolo/tools/data_loader.py +++ b/yolo/tools/data_loader.py @@ -234,7 +234,7 @@ def create_dataloader(data_cfg: DataConfig, dataset_cfg: DatasetConfig, task: st class StreamDataLoader: - def __init__(self, data_cfg: DataConfig): + def __init__(self, data_cfg: DataConfig, asynchronous: bool = True): self.source = data_cfg.source self.running = True self.is_stream = isinstance(self.source, int) or str(self.source).lower().startswith("rtmp://") @@ -244,13 +244,16 @@ def __init__(self, data_cfg: DataConfig): if self.is_stream: import cv2 - self.cap = cv2.VideoCapture(self.source) else: self.source = Path(self.source) self.queue = Queue() - self.thread = Thread(target=self.load_source) - self.thread.start() + + if asynchronous: + self.thread = Thread(target=self.load_source) + self.thread.start() + else: + self.load_source() def load_source(self): if self.source.is_dir(): # image folder diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py index 5eec2db36..332e960e9 100644 --- a/yolo/tools/solver.py +++ b/yolo/tools/solver.py @@ -6,7 +6,7 @@ from yolo.config.config import Config from yolo.model.yolo import create_model -from yolo.tools.data_loader import create_dataloader +from yolo.tools.data_loader import StreamDataLoader, create_dataloader from yolo.tools.drawer import draw_bboxes from yolo.tools.loss_functions import create_loss_function from yolo.utils.bounding_box_utils import create_converter, to_metrics_format @@ -112,7 +112,9 @@ def __init__(self, cfg: Config): super().__init__(cfg) self.cfg = cfg # TODO: Add FastModel - self.predict_loader = create_dataloader(cfg.task.data, cfg.dataset, cfg.task.task) + # StreamDataLoader has to be synchronous, otherwise not all images are loaded + # TODO: Make this load in parallel + self.predict_loader = StreamDataLoader(cfg.task.data, asynchronous=False) def setup(self, stage): self.vec2box = create_converter( From a128cf117653c276773c7cab6e08079a2f703301 Mon Sep 17 00:00:00 2001 From: Ramon Date: Fri, 28 Feb 2025 10:47:28 +0100 Subject: [PATCH 7/7] =?UTF-8?q?=F0=9F=A7=BC=20Clean=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/tools/data_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py index a513dc443..bf2903f17 100644 --- a/yolo/tools/data_loader.py +++ b/yolo/tools/data_loader.py @@ -244,6 +244,7 @@ def __init__(self, data_cfg: DataConfig, asynchronous: bool = True): if self.is_stream: import cv2 + self.cap = cv2.VideoCapture(self.source) else: self.source = Path(self.source)