Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/labelformat/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
YOLOv26ObjectDetectionInput,
YOLOv26ObjectDetectionOutput,
)
from labelformat.formats.youtubevis import YouTubeVISObjectDetectionTrackInput

__all__ = [
"COCOInstanceSegmentationInput",
Expand Down Expand Up @@ -105,4 +106,5 @@
"YOLOv26ObjectDetectionInput",
"YOLOv26ObjectDetectionOutput",
"MaskPairInstanceSegmentationInput",
"YouTubeVISObjectDetectionTrackInput",
]
93 changes: 93 additions & 0 deletions src/labelformat/formats/youtubevis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import json
from argparse import ArgumentParser
from pathlib import Path
from typing import Dict, Iterable, List

from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat
from labelformat.model.category import Category
from labelformat.model.object_detection_track import (
ObjectDetectionTrackInput,
SingleObjectDetectionTrack,
VideoObjectDetectionTrack,
)
from labelformat.model.video import Video
from labelformat.types import JsonDict


class YouTubeVISObjectDetectionTrackInput(ObjectDetectionTrackInput):
@staticmethod
def add_cli_arguments(parser: ArgumentParser) -> None:
parser.add_argument(
"--input-file",
type=Path,
required=True,
help="Path to input YouTube-VIS JSON file",
)

def __init__(self, input_file: Path) -> None:
with input_file.open() as file:
self._data = json.load(file)

def get_categories(self) -> Iterable[Category]:
for category in self._data["categories"]:
yield Category(
id=category["id"],
name=category["name"],
)

def get_videos(self) -> Iterable[Video]:
for video in self._data["videos"]:
yield Video(
id=video["id"],
# TODO (Jonas, 1/2026): The file_names do not hold the video file extension. Solution required.
filename=Path(video["file_names"][0]).parent.name,
width=int(video["width"]),
height=int(video["height"]),
number_of_frames=int(video["length"]),
)

def get_labels(self) -> Iterable[VideoObjectDetectionTrack]:
video_id_to_video = {video.id: video for video in self.get_videos()}
category_id_to_category = {
category.id: category for category in self.get_categories()
}
video_id_to_tracks: Dict[int, List[JsonDict]] = {
video_id: [] for video_id in video_id_to_video.keys()
}
for ann in self._data["annotations"]:
video_id_to_tracks[ann["video_id"]].append(ann)

for video_id, tracks in video_id_to_tracks.items():
video = video_id_to_video[video_id]
objects = []
for track in tracks:
boxes = _get_object_track_boxes(ann=track)
objects.append(
SingleObjectDetectionTrack(
category=category_id_to_category[ann["category_id"]],
boxes=boxes,
)
)
yield VideoObjectDetectionTrack(
video=video,
objects=objects,
)


def _get_object_track_boxes(
ann: JsonDict,
) -> list[BoundingBox | None]:
boxes: list[BoundingBox | None] = []
for bbox in ann["bboxes"]:
if bbox is None or len(bbox) == 0:
boxes.append(None)
else:
boxes.append(
BoundingBox.from_format(
bbox=[float(x) for x in bbox],
format=BoundingBoxFormat.XYWH,
)
)
return boxes
92 changes: 92 additions & 0 deletions tests/unit/formats/test_youtubevis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
from pathlib import Path

from labelformat.formats.youtubevis import YouTubeVISObjectDetectionTrackInput
from labelformat.model.bounding_box import BoundingBox
from labelformat.model.category import Category
from labelformat.model.object_detection_track import (
SingleObjectDetectionTrack,
VideoObjectDetectionTrack,
)
from labelformat.model.video import Video


class TestYouTubeVISObjectDetectionTrackInput:
def test_get_categories(self, tmp_path: Path) -> None:
input_file = _write_youtube_vis_json(tmp_path / "instances.json")
label_input = YouTubeVISObjectDetectionTrackInput(input_file=input_file)

assert list(label_input.get_categories()) == [Category(id=1, name="cat")]

def test_get_videos(self, tmp_path: Path) -> None:
input_file = _write_youtube_vis_json(tmp_path / "instances.json")
label_input = YouTubeVISObjectDetectionTrackInput(input_file=input_file)

assert list(label_input.get_videos()) == [
Video(
id=5,
filename="video1",
width=640,
height=480,
number_of_frames=2,
)
]

def test_get_labels(self, tmp_path: Path) -> None:
input_file = _write_youtube_vis_json(tmp_path / "instances.json")
label_input = YouTubeVISObjectDetectionTrackInput(input_file=input_file)

assert list(label_input.get_labels()) == [
VideoObjectDetectionTrack(
video=Video(
id=5,
filename="video1",
width=640,
height=480,
number_of_frames=2,
),
objects=[
SingleObjectDetectionTrack(
category=Category(id=1, name="cat"),
boxes=[
BoundingBox(
xmin=10.0,
ymin=20.0,
xmax=40.0,
ymax=60.0,
),
None,
],
)
],
)
]


def _write_youtube_vis_json(input_file: Path) -> Path:
data = {
"categories": [
{"id": 1, "name": "cat"},
],
"videos": [
{
"id": 5,
"file_names": ["video1/00000.jpg", "video1/00001.jpg"],
"width": 640,
"height": 480,
"length": 2,
}
],
"annotations": [
{
"video_id": 5,
"category_id": 1,
"bboxes": [
[10.0, 20.0, 30.0, 40.0],
None,
],
}
],
}
input_file.write_text(json.dumps(data))
return input_file