From 0b6f55f3d7748be50f0336e9a7a6497c9279bc58 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Thu, 15 Aug 2024 02:11:58 -0400
Subject: [PATCH 1/7] refactor: dataset_utils.create_image_metadata now returns
 annotations_dict and image_info_dict with image file name as key to ensure
 uniform key accross the code base.

refactor: dataset_utils.map_annotations_to_image_names returns annotations list mapped to image file names instead of image_id.

refactor: several variable names made more descriptive.

docs: docstrings updated.
---
 yolo/utils/dataset_utils.py | 71 +++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 26 deletions(-)

diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index a6c6e1fd..3c5881a3 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -39,45 +39,64 @@ def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path
 
 def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:
     """
-    Create a dictionary containing image information and annotations indexed by image ID.
+    Create a dictionary containing image information and annotations
+    indexed by image name.
+
+    Image name is the file name of the image including the extension.
 
     Args:
         labels_path (str): The path to the annotation json file.
 
     Returns:
-        - annotations_index: A dictionary where keys are image IDs and values are lists of annotations.
-        - image_info_dict: A dictionary where keys are image file names without extension and values are image information dictionaries.
+        A Tuple of annotations_dict and image_info_dict.
+        annotations_dict is a dictionary where keys are image names and values
+        are lists of annotations.
+        image_info_dict is a dictionary where keys are image file names and
+        values are image information dictionaries.
     """
     with open(labels_path, "r") as file:
-        labels_data = json.load(file)
-        id_to_idx = discretize_categories(labels_data.get("categories", [])) if "categories" in labels_data else None
-        annotations_index = organize_annotations_by_image(labels_data, id_to_idx)  # check lookup is a good name?
-        image_info_dict = {Path(img["file_name"]).stem: img for img in labels_data["images"]}
-        return annotations_index, image_info_dict
-
-
-def organize_annotations_by_image(data: Dict[str, Any], id_to_idx: Optional[Dict[int, int]]):
+        json_data = json.load(file)
+        image_id_to_file_name_dict = {
+            img['id'] : Path(img["file_name"]).name for img in json_data["images"]
+        }
+        # TODO: id_to_idx is unnecessary. `idx = id - 1`` in coco as category_id starts from 1.
+        # what if we had 1M images? Unnecessary!
+        id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
+        annotations_dict = map_annotations_to_image_names(json_data, id_to_idx, image_id_to_file_name_dict)  # check lookup is a good name?
+        image_info_dict = {Path(img["file_name"]).name: img for img in json_data["images"]}
+        return annotations_dict, image_info_dict
+
+
+def map_annotations_to_image_names(
+        json_data: Dict[str, Any],
+        category_id_to_idx: Optional[Dict[int, int]],
+        image_id_to_image_name:dict[int, str]
+) -> dict[str, list[dict]]:
     """
-    Use image index to lookup every annotations
+    Returns a dict mapping image file names to a list of all corresponding annotations.
     Args:
-        data (Dict[str, Any]): A dictionary containing annotation data.
+        json_data: Data read from a COCO json file.
+        category_id_to_idx: For COCO dataset, a dict mapping from category_id
+            to (category_id - 1).  # TODO: depricate?
+        image_id_to_image_name: Dict mapping image_id to image_file name. 
 
     Returns:
-        Dict[int, List[Dict[str, Any]]]: A dictionary where keys are image IDs and values are lists of annotations.
-        Annotations with "iscrowd" set to True are excluded from the index.
-
+        image_name_to_annotation_dict_list: A dictionary where keys are image IDs
+            and values are lists of annotation dictionaries.
+            Annotations with "iscrowd" set to True, are excluded.
     """
-    annotation_lookup = {}
-    for anno in data["annotations"]:
-        if anno["iscrowd"]:
+    image_name_to_annotation_dict_list = {}
+    for annotation_dict in json_data["annotations"]:
+        if annotation_dict["iscrowd"]:
             continue
-        image_id = anno["image_id"]
-        if id_to_idx:
-            anno["category_id"] = id_to_idx[anno["category_id"]]
-        if image_id not in annotation_lookup:
-            annotation_lookup[image_id] = []
-        annotation_lookup[image_id].append(anno)
-    return annotation_lookup
+        image_id = annotation_dict["image_id"]
+        image_name = image_id_to_image_name[image_id]
+        if category_id_to_idx:
+            annotation_dict["category_id"] = category_id_to_idx[annotation_dict["category_id"]]
+        if image_name not in image_name_to_annotation_dict_list:
+            image_name_to_annotation_dict_list[image_name] = []
+        image_name_to_annotation_dict_list[image_name].append(annotation_dict)
+    return image_name_to_annotation_dict_list
 
 
 def scale_segmentation(

From f3d5008ef2cc7b55700b1d74afa941084091ea65 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Thu, 15 Aug 2024 02:12:45 -0400
Subject: [PATCH 2/7] refactor: data_loader.filter_data now returns data with
 image_name instead of image_path as the key.

refactor: annotations_index renamed to annotations_dict.
---
 yolo/tools/data_loader.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 785235a9..0874c45d 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -71,7 +71,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         labels_path, data_type = locate_label_paths(dataset_path, phase_name)
         images_list = sorted([p.name for p in Path(images_path).iterdir() if p.is_file()])
         if data_type == "json":
-            annotations_index, image_info_dict = create_image_metadata(labels_path)
+            annotations_dict, image_info_dict = create_image_metadata(labels_path)
 
         data = []
         valid_inputs = 0
@@ -81,10 +81,10 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
             image_id = Path(image_name).stem
 
             if data_type == "json":
-                image_info = image_info_dict.get(image_id, None)
+                image_info = image_info_dict.get(image_name, None)
                 if image_info is None:
                     continue
-                annotations = annotations_index.get(image_info["id"], [])
+                annotations = annotations_dict.get(image_name, [])
                 image_seg_annotations = scale_segmentation(annotations, image_info)
                 if not image_seg_annotations:
                     continue
@@ -99,9 +99,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                 image_seg_annotations = []
 
             labels = self.load_valid_labels(image_id, image_seg_annotations)
-
-            img_path = images_path / image_name
-            data.append((img_path, labels))
+            data.append((image_name, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data

From 87e7c467b19ef6900ab68a12762f9aea7ea3aaa3 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Thu, 15 Aug 2024 23:19:09 -0400
Subject: [PATCH 3/7] refactor: switched kes from image_names with extensions
 to without extensions.

---
 yolo/tools/data_loader.py   | 10 ++++++----
 yolo/utils/dataset_utils.py | 13 ++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 0874c45d..ea64a729 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -65,7 +65,8 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
             labels_path (str): Path to the directory containing label files.
 
         Returns:
-            list: A list of tuples, each containing the path to an image file and its associated segmentation as a tensor.
+            list: A list of tuples, each containing the path to an image file
+                and its associated segmentation as a tensor.
         """
         images_path = dataset_path / "images" / phase_name
         labels_path, data_type = locate_label_paths(dataset_path, phase_name)
@@ -81,10 +82,10 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
             image_id = Path(image_name).stem
 
             if data_type == "json":
-                image_info = image_info_dict.get(image_name, None)
+                image_info = image_info_dict.get(image_id, None)
                 if image_info is None:
                     continue
-                annotations = annotations_dict.get(image_name, [])
+                annotations = annotations_dict.get(image_id, [])
                 image_seg_annotations = scale_segmentation(annotations, image_info)
                 if not image_seg_annotations:
                     continue
@@ -99,7 +100,8 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                 image_seg_annotations = []
 
             labels = self.load_valid_labels(image_id, image_seg_annotations)
-            data.append((image_name, labels))
+            image_path = images_path / image_name
+            data.append((image_path, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index 3c5881a3..c4810c88 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -40,30 +40,29 @@ def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path
 def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:
     """
     Create a dictionary containing image information and annotations
-    indexed by image name.
-
-    Image name is the file name of the image including the extension.
+    both indexed by image id. Image id is the file name without the extension.
+    It is not the same as the int image id saved in coco .json files.
 
     Args:
         labels_path (str): The path to the annotation json file.
 
     Returns:
         A Tuple of annotations_dict and image_info_dict.
-        annotations_dict is a dictionary where keys are image names and values
+        annotations_dict is a dictionary where keys are image ids and values
         are lists of annotations.
-        image_info_dict is a dictionary where keys are image file names and
+        image_info_dict is a dictionary where keys are image file id and
         values are image information dictionaries.
     """
     with open(labels_path, "r") as file:
         json_data = json.load(file)
         image_id_to_file_name_dict = {
-            img['id'] : Path(img["file_name"]).name for img in json_data["images"]
+            img['id'] : Path(img["file_name"]).stem for img in json_data["images"]
         }
         # TODO: id_to_idx is unnecessary. `idx = id - 1`` in coco as category_id starts from 1.
         # what if we had 1M images? Unnecessary!
         id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
         annotations_dict = map_annotations_to_image_names(json_data, id_to_idx, image_id_to_file_name_dict)  # check lookup is a good name?
-        image_info_dict = {Path(img["file_name"]).name: img for img in json_data["images"]}
+        image_info_dict = {Path(img["file_name"]).stem: img for img in json_data["images"]}
         return annotations_dict, image_info_dict
 
 

From d4a4718aa61aa91c910606e350e8af460673b4d4 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Sun, 18 Aug 2024 20:01:26 -0400
Subject: [PATCH 4/7] fix: dataloader now returns image_id to and validation
 works even with non int convertible image names.

---
 yolo/tools/data_loader.py   | 25 ++++++++++++-------------
 yolo/tools/solver.py        |  4 ++--
 yolo/utils/dataset_utils.py | 24 ++++++++++--------------
 yolo/utils/model_utils.py   |  6 +++---
 4 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index ea64a729..704aee03 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -72,16 +72,14 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         labels_path, data_type = locate_label_paths(dataset_path, phase_name)
         images_list = sorted([p.name for p in Path(images_path).iterdir() if p.is_file()])
         if data_type == "json":
-            annotations_dict, image_info_dict = create_image_metadata(labels_path)
-
+            annotations_dict, image_info_dict, image_name_to_id_dict = create_image_metadata(labels_path)
         data = []
         valid_inputs = 0
         for image_name in track(images_list, description="Filtering data"):
             if not image_name.lower().endswith((".jpg", ".jpeg", ".png")):
                 continue
-            image_id = Path(image_name).stem
-
             if data_type == "json":
+                image_id = image_name_to_id_dict[image_name]
                 image_info = image_info_dict.get(image_id, None)
                 if image_info is None:
                     continue
@@ -91,6 +89,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                     continue
 
             elif data_type == "txt":
+                image_id = Path(image_name).stem
                 label_path = labels_path / f"{image_id}.txt"
                 if not label_path.is_file():
                     continue
@@ -101,12 +100,12 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
 
             labels = self.load_valid_labels(image_id, image_seg_annotations)
             image_path = images_path / image_name
-            data.append((image_path, labels))
+            data.append((image_id, image_path, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
 
-    def load_valid_labels(self, label_path: str, seg_data_one_img: list) -> Union[Tensor, None]:
+    def load_valid_labels(self, image_id: str, seg_data_one_img: list) -> Union[Tensor, None]:
         """
         Loads and validates bounding box data is [0, 1] from a label file.
 
@@ -128,22 +127,22 @@ def load_valid_labels(self, label_path: str, seg_data_one_img: list) -> Union[Te
         if bboxes:
             return torch.stack(bboxes)
         else:
-            logger.warning("No valid BBox in {}", label_path)
+            logger.warning("No valid BBox in image id:{}", image_id)
             return torch.zeros((0, 5))
 
     def get_data(self, idx):
-        img_path, bboxes = self.data[idx]
+        image_id, img_path, bboxes = self.data[idx]
         img = Image.open(img_path).convert("RGB")
-        return img, bboxes, img_path
+        return img, bboxes, image_id
 
     def get_more_data(self, num: int = 1):
         indices = torch.randint(0, len(self), (num,))
         return [self.get_data(idx)[:2] for idx in indices]
 
     def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
-        img, bboxes, img_path = self.get_data(idx)
+        img, bboxes, image_id = self.get_data(idx)
         img, bboxes, rev_tensor = self.transform(img, bboxes)
-        return img, bboxes, rev_tensor, img_path
+        return img, bboxes, rev_tensor, image_id
 
     def __len__(self) -> int:
         return len(self.data)
@@ -189,11 +188,11 @@ def collate_fn(self, batch: List[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, List[T
             batch_targets[idx, : min(target_size, 100)] = batch[idx][1][:100]
         batch_targets[:, :, 1:] *= self.image_size
 
-        batch_images, _, batch_reverse, batch_path = zip(*batch)
+        batch_images, _, batch_reverse, batch_image_ids = zip(*batch)
         batch_images = torch.stack(batch_images)
         batch_reverse = torch.stack(batch_reverse)
 
-        return batch_size, batch_images, batch_targets, batch_reverse, batch_path
+        return batch_size, batch_images, batch_targets, batch_reverse, batch_image_ids
 
 
 def create_dataloader(data_cfg: DataConfig, dataset_cfg: DatasetConfig, task: str = "train", use_ddp: bool = False):
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 51ceffc0..d0a69f9a 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -237,7 +237,7 @@ def solve(self, dataloader, epoch_idx=1):
         self.model.eval()
         predict_json, mAPs = [], defaultdict(list)
         self.progress.start_one_epoch(len(dataloader), task="Validate")
-        for batch_size, images, targets, rev_tensor, img_paths in dataloader:
+        for batch_size, images, targets, rev_tensor, image_ids in dataloader:
             images, targets, rev_tensor = images.to(self.device), targets.to(self.device), rev_tensor.to(self.device)
             with torch.no_grad():
                 predicts = self.model(images)
@@ -250,7 +250,7 @@ def solve(self, dataloader, epoch_idx=1):
             avg_mAPs = {key: 100 * torch.mean(torch.stack(val)) for key, val in mAPs.items()}
             self.progress.one_batch(avg_mAPs)
 
-            predict_json.extend(predicts_to_json(img_paths, predicts, rev_tensor))
+            predict_json.extend(predicts_to_json(image_ids, predicts, rev_tensor))
         self.progress.finish_one_epoch(avg_mAPs, epoch_idx=epoch_idx)
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
 
diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index c4810c88..573ae3bf 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -55,21 +55,20 @@ def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str,
     """
     with open(labels_path, "r") as file:
         json_data = json.load(file)
-        image_id_to_file_name_dict = {
-            img['id'] : Path(img["file_name"]).stem for img in json_data["images"]
+        image_name_to_id_dict = {
+            Path(img["file_name"]).name: img['id'] for img in json_data["images"]
         }
         # TODO: id_to_idx is unnecessary. `idx = id - 1`` in coco as category_id starts from 1.
         # what if we had 1M images? Unnecessary!
         id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
-        annotations_dict = map_annotations_to_image_names(json_data, id_to_idx, image_id_to_file_name_dict)  # check lookup is a good name?
-        image_info_dict = {Path(img["file_name"]).stem: img for img in json_data["images"]}
-        return annotations_dict, image_info_dict
+        annotations_dict = map_annotations_to_image_names(json_data, id_to_idx)  # check lookup is a good name?
+        image_info_dict = {img['id']: img for img in json_data["images"]}
+        return annotations_dict, image_info_dict, image_name_to_id_dict
 
 
 def map_annotations_to_image_names(
         json_data: Dict[str, Any],
         category_id_to_idx: Optional[Dict[int, int]],
-        image_id_to_image_name:dict[int, str]
 ) -> dict[str, list[dict]]:
     """
     Returns a dict mapping image file names to a list of all corresponding annotations.
@@ -77,25 +76,22 @@ def map_annotations_to_image_names(
         json_data: Data read from a COCO json file.
         category_id_to_idx: For COCO dataset, a dict mapping from category_id
             to (category_id - 1).  # TODO: depricate?
-        image_id_to_image_name: Dict mapping image_id to image_file name. 
-
     Returns:
         image_name_to_annotation_dict_list: A dictionary where keys are image IDs
             and values are lists of annotation dictionaries.
             Annotations with "iscrowd" set to True, are excluded.
     """
-    image_name_to_annotation_dict_list = {}
+    image_id_to_annotation_dict_list = {}
     for annotation_dict in json_data["annotations"]:
         if annotation_dict["iscrowd"]:
             continue
         image_id = annotation_dict["image_id"]
-        image_name = image_id_to_image_name[image_id]
         if category_id_to_idx:
             annotation_dict["category_id"] = category_id_to_idx[annotation_dict["category_id"]]
-        if image_name not in image_name_to_annotation_dict_list:
-            image_name_to_annotation_dict_list[image_name] = []
-        image_name_to_annotation_dict_list[image_name].append(annotation_dict)
-    return image_name_to_annotation_dict_list
+        if image_id not in image_id_to_annotation_dict_list:
+            image_id_to_annotation_dict_list[image_id] = []
+        image_id_to_annotation_dict_list[image_id].append(annotation_dict)
+    return image_id_to_annotation_dict_list
 
 
 def scale_segmentation(
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index c35b6009..7a9e7941 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -160,19 +160,19 @@ def collect_prediction(predict_json: List, local_rank: int) -> List:
     return predict_json
 
 
-def predicts_to_json(img_paths, predicts, rev_tensor):
+def predicts_to_json(image_ids, predicts, rev_tensor):
     """
     TODO: function document
     turn a batch of imagepath and predicts(n x 6 for each image) to a List of diction(Detection output)
     """
     batch_json = []
-    for img_path, bboxes, box_reverse in zip(img_paths, predicts, rev_tensor):
+    for image_id, bboxes, box_reverse in zip(image_ids, predicts, rev_tensor):
         scale, shift = box_reverse.split([1, 4])
         bboxes[:, 1:5] = (bboxes[:, 1:5] - shift[None]) / scale[None]
         bboxes[:, 1:5] = transform_bbox(bboxes[:, 1:5], "xyxy -> xywh")
         for cls, *pos, conf in bboxes:
             bbox = {
-                "image_id": int(Path(img_path).stem),
+                "image_id": image_id,
                 "category_id": IDX_TO_ID[int(cls)],
                 "bbox": [float(p) for p in pos],
                 "score": float(conf),

From ceff4ee502876ebfc9ea17c6dfa27ceb4e3b94b6 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Sun, 18 Aug 2024 21:58:32 -0400
Subject: [PATCH 5/7] docs: updated docstrings.

---
 yolo/tools/data_loader.py   | 26 ++++++++++++++++-------
 yolo/utils/dataset_utils.py | 42 +++++++++++++++++++++----------------
 yolo/utils/model_utils.py   | 17 ++++++++++++---
 3 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 704aee03..44179c92 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -34,7 +34,7 @@ def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str
         self.transform.get_more_data = self.get_more_data
         self.data = self.load_data(Path(dataset_cfg.path), phase_name)
 
-    def load_data(self, dataset_path: Path, phase_name: str):
+    def load_data(self, dataset_path: Path, phase_name: str) -> list:
         """
         Loads data from a cache or generates a new cache for a specific dataset phase.
 
@@ -43,7 +43,7 @@ def load_data(self, dataset_path: Path, phase_name: str):
             phase_name (str): The specific phase of the dataset (e.g., 'train', 'test') to load or generate data for.
 
         Returns:
-            dict: The loaded data from the cache for the specified phase.
+            list: The loaded data from the cache for the specified phase.
         """
         cache_path = dataset_path / f"{phase_name}.cache"
 
@@ -58,21 +58,31 @@ def load_data(self, dataset_path: Path, phase_name: str):
 
     def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         """
-        Filters and collects dataset information by pairing images with their corresponding labels.
+        Filters and collects dataset information by pairing images with
+        their corresponding labels.
 
         Parameters:
-            images_path (Path): Path to the directory containing image files.
-            labels_path (str): Path to the directory containing label files.
+            dataset_path (Path): The root path to the dataset directory.
+            phase_name (str): The specific phase of the dataset
+                (e.g., 'train', 'test') to load or generate data for.
 
         Returns:
-            list: A list of tuples, each containing the path to an image file
-                and its associated segmentation as a tensor.
+            list: A list of tuples, each containing image id, path to an image file
+                and its associated segmentation as a tensor. For COCO formatted .json
+                files, image id is the `int` `image_id` attribute for each annotation
+                in the json file.
+                For YOLO formatted .txt files, image id is the image file name without
+                the extension.
         """
         images_path = dataset_path / "images" / phase_name
         labels_path, data_type = locate_label_paths(dataset_path, phase_name)
         images_list = sorted([p.name for p in Path(images_path).iterdir() if p.is_file()])
         if data_type == "json":
-            annotations_dict, image_info_dict, image_name_to_id_dict = create_image_metadata(labels_path)
+            (
+                annotations_dict,
+                image_info_dict,
+                image_name_to_id_dict
+            ) = create_image_metadata(labels_path)
         data = []
         valid_inputs = 0
         for image_name in track(images_list, description="Filtering data"):
diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index 573ae3bf..9d6bb47f 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -37,49 +37,55 @@ def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path
     return [], None
 
 
-def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:
+def create_image_metadata(
+        labels_path: str
+) -> Tuple[Dict[int, List], Dict[int, Dict], Dict[str, int]]:
     """
-    Create a dictionary containing image information and annotations
-    both indexed by image id. Image id is the file name without the extension.
-    It is not the same as the int image id saved in coco .json files.
+    Returnes three dictionaries mapping image id to list of annotations, 
+    image id to image information, and image name to image id.
+    Image id is the `int` `id` assigned to a image in the COCO formatted .json file.
 
     Args:
         labels_path (str): The path to the annotation json file.
 
     Returns:
-        A Tuple of annotations_dict and image_info_dict.
-        annotations_dict is a dictionary where keys are image ids and values
-        are lists of annotations.
-        image_info_dict is a dictionary where keys are image file id and
-        values are image information dictionaries.
+        (annotations_dict, image_info_dict, image_name_to_id_dict):
+            annotations_dict is a dictionary where keys are image ids and values
+            are lists of annotation dictionaries.
+            image_info_dict is a dictionary where keys are image file id and
+            values are image information dictionaries.
+            image_name_to_id_dict is a dictionary with image name without
+            extension as key and int image id as value.
     """
     with open(labels_path, "r") as file:
         json_data = json.load(file)
         image_name_to_id_dict = {
             Path(img["file_name"]).name: img['id'] for img in json_data["images"]
         }
-        # TODO: id_to_idx is unnecessary. `idx = id - 1`` in coco as category_id starts from 1.
-        # what if we had 1M images? Unnecessary!
         id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
-        annotations_dict = map_annotations_to_image_names(json_data, id_to_idx)  # check lookup is a good name?
+        annotations_dict = organize_annotations_by_image(json_data, id_to_idx)  # check lookup is a good name?
         image_info_dict = {img['id']: img for img in json_data["images"]}
         return annotations_dict, image_info_dict, image_name_to_id_dict
 
 
-def map_annotations_to_image_names(
+def organize_annotations_by_image(
         json_data: Dict[str, Any],
         category_id_to_idx: Optional[Dict[int, int]],
-) -> dict[str, list[dict]]:
+) -> dict[int, list[dict]]:
     """
-    Returns a dict mapping image file names to a list of all corresponding annotations.
+    Returns a dict mapping image id to a list of all corresponding annotations.
+
+    Annotations with "iscrowd" set to True, are excluded. Image id is the `int`
+    `image_id` in the corresponding annotation dict stored in the
+    COCO formatted .json file.
+
     Args:
         json_data: Data read from a COCO json file.
         category_id_to_idx: For COCO dataset, a dict mapping from category_id
-            to (category_id - 1).  # TODO: depricate?
+            to (category_id - 1).
     Returns:
-        image_name_to_annotation_dict_list: A dictionary where keys are image IDs
+        image_name_to_annotation_dict_list: A dictionary where keys are image ids
             and values are lists of annotation dictionaries.
-            Annotations with "iscrowd" set to True, are excluded.
     """
     image_id_to_annotation_dict_list = {}
     for annotation_dict in json_data["annotations"]:
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index 7a9e7941..f752913d 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -160,10 +160,21 @@ def collect_prediction(predict_json: List, local_rank: int) -> List:
     return predict_json
 
 
-def predicts_to_json(image_ids, predicts, rev_tensor):
+def predicts_to_json(
+        image_ids:tuple[int],
+        predicts:list[Tensor],
+        rev_tensor:Tensor
+) -> list[dict[str, any]]:
     """
-    TODO: function document
-    turn a batch of imagepath and predicts(n x 6 for each image) to a List of diction(Detection output)
+    Returns a list of prediction dictionaries. Each dict contains, image_id,
+    category_id, bbox and score.
+
+    Args:
+        image_ids: Image ids obtained from COCO formatted .json files.
+        predicts: For each iamge, contains a tensor of shape (n, 6),
+            where n is the number of detected bbox in the corresponding image.
+        rev_tensor: A tensor of shape (m,5), where m is the number of images.
+            TODO: add docstring of what this is.
     """
     batch_json = []
     for image_id, bboxes, box_reverse in zip(image_ids, predicts, rev_tensor):

From db6a63dc1bb92e81cf5c7a128a55f820c39692d9 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Sun, 18 Aug 2024 22:19:12 -0400
Subject: [PATCH 6/7] docs: minor docstring fix.

---
 yolo/utils/model_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index f752913d..834ab7b0 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -161,7 +161,7 @@ def collect_prediction(predict_json: List, local_rank: int) -> List:
 
 
 def predicts_to_json(
-        image_ids:tuple[int],
+        image_ids:Union[tuple[int], tuple[str]],
         predicts:list[Tensor],
         rev_tensor:Tensor
 ) -> list[dict[str, any]]:
@@ -170,7 +170,9 @@ def predicts_to_json(
     category_id, bbox and score.
 
     Args:
-        image_ids: Image ids obtained from COCO formatted .json files.
+        image_ids: Tuple of image ids.
+            When using a COCO .json annotation file, image ids are int.
+            When using YOLO .txt annotation files, image ids are string. 
         predicts: For each iamge, contains a tensor of shape (n, 6),
             where n is the number of detected bbox in the corresponding image.
         rev_tensor: A tensor of shape (m,5), where m is the number of images.

From 9ffafc9ab5d1d7192b5daec369ee240523ef0aa0 Mon Sep 17 00:00:00 2001
From: Abdul-Mukit <abdul.mukit64@gmail.com>
Date: Sun, 18 Aug 2024 22:39:30 -0400
Subject: [PATCH 7/7] docs: docstring update.

---
 yolo/tools/data_loader.py   | 10 ++++++++--
 yolo/utils/dataset_utils.py |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 44179c92..48288d69 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -115,12 +115,18 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
 
-    def load_valid_labels(self, image_id: str, seg_data_one_img: list) -> Union[Tensor, None]:
+    def load_valid_labels(
+        self,
+        image_id: Union[int, str],
+        seg_data_one_img: list
+    ) -> Union[Tensor, None]:
         """
         Loads and validates bounding box data is [0, 1] from a label file.
 
         Parameters:
-            label_path (str): The filepath to the label file containing bounding box data.
+            image_id (int | str): Image id.
+            If COCO .json file is used, image id is a `int`.
+            If YOLO .txt file is used, image id is a string.
 
         Returns:
             Tensor or None: A tensor of all valid bounding boxes if any are found; otherwise, None.
diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index 9d6bb47f..0e02ef7f 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -41,7 +41,7 @@ def create_image_metadata(
         labels_path: str
 ) -> Tuple[Dict[int, List], Dict[int, Dict], Dict[str, int]]:
     """
-    Returnes three dictionaries mapping image id to list of annotations, 
+    Returns three dictionaries mapping image id to list of annotations, 
     image id to image information, and image name to image id.
     Image id is the `int` `id` assigned to a image in the COCO formatted .json file.