From a6802e15214a592cdb67a00482ab226f9da20154 Mon Sep 17 00:00:00 2001
From: AkashWudali12 <akash.wudali@gmail.com>
Date: Tue, 21 Jan 2025 16:34:15 -0500
Subject: [PATCH 1/2] fix error where there are no variation_laplacians,
 causing numpy error

---
 Katna/image_selector.py | 11 ++++++++---
 Katna/video.py          |  6 ++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/Katna/image_selector.py b/Katna/image_selector.py
index 09f40a6..73d05c1 100644
--- a/Katna/image_selector.py
+++ b/Katna/image_selector.py
@@ -229,9 +229,14 @@ def __get_best_images_index_from_each_cluster__(
             n_images = np.arange(len(curr_row))
             variance_laplacians = self.__get_laplacian_scores(files, n_images)
 
-            # Selecting image with low burr(high laplacian) score
-            selected_frame_of_current_cluster = curr_row[np.argmax(variance_laplacians)]
-            filtered_items.append(selected_frame_of_current_cluster)
+            if len(variance_laplacians) > 0:
+                # Selecting image with low burr(high laplacian) score
+                selected_frame_of_current_cluster = curr_row[np.argmax(variance_laplacians)]
+                filtered_items.append(selected_frame_of_current_cluster)
+            
+            else:
+                # temporary catch for situation where there are no variance laplacians
+                print("NO VARIANCE LAPLACIANS, downstream issue, ignore for now")
 
         return filtered_items
 
diff --git a/Katna/video.py b/Katna/video.py
index 80b114d..6c6f059 100644
--- a/Katna/video.py
+++ b/Katna/video.py
@@ -174,7 +174,7 @@ def _extract_keyframes_from_video(self, no_of_frames, file_path):
         chunked_videos = self._split(file_path)
         frame_extractor = FrameExtractor()
 
-        # Passing all the clipped videos for  the frame extraction using map function of the
+        # Passing all the clipped videos for the frame extraction using map function of the
         # multiprocessing pool
         with self.pool_extractor:
             extracted_candidate_frames = self.pool_extractor.map(
@@ -320,7 +320,9 @@ def extract_video_keyframes(self, no_of_frames, file_path, writer):
 
         writer.write(file_path, top_frames)
         print("Completed processing for : ", file_path)
-
+        
+        # returning top frames for processing by caller
+        return top_frames
     def _split_large_video(self, file_path):
         """
         Splits large video file into smaller videos (based on conf) so they don't take up memory

From 8cbd586f63428a7fbf75ee7a89478dda04f062fe Mon Sep 17 00:00:00 2001
From: AkashWudali12 <akash.wudali@gmail.com>
Date: Wed, 22 Jan 2025 17:19:44 -0500
Subject: [PATCH 2/2] added methods to get timestamps and time ranges of
 keyframes, added filepath list to KeyFrameDiskWriter class

---
 Katna/frame_extractor.py | 132 ++++++--
 Katna/image_selector.py  |  69 +++++
 Katna/video.py           | 638 +++++++++++++++++++++++++--------------
 Katna/writer.py          |   3 +-
 4 files changed, 599 insertions(+), 243 deletions(-)

diff --git a/Katna/frame_extractor.py b/Katna/frame_extractor.py
index 93e4aa6..068af1e 100644
--- a/Katna/frame_extractor.py
+++ b/Katna/frame_extractor.py
@@ -95,14 +95,12 @@ def __extract_all_frames_from_video__(self, videopath):
 
             frame_diffs = []
             frames = []
+            
             for _ in range(0, self.max_frames_in_chunk):
                 if ret:
-                    # Calling process frame function to calculate the frame difference and adding the difference 
-                    # in **frame_diffs** list and frame to **frames** list
                     prev_frame, curr_frame = self.__process_frame(frame, prev_frame, frame_diffs, frames)
                     i = i + 1
                     ret, frame = cap.read()
-                    # print(frame_count)
                 else:
                     cap.release()
                     break
@@ -110,18 +108,59 @@ def __extract_all_frames_from_video__(self, videopath):
             yield frames, frame_diffs
         cap.release()
 
+    def __extract_all_frames_from_video_with_time__(self, videopath, chunk_info):
+        """Generator function for extracting frames from a input video which are sufficiently different from each other, 
+        and return result back as list of opencv images in memory along with their timestamps
+
+        :param videopath: inputvideo path
+        :type videopath: `str`
+        :param chunk_info: list of chunk timing info
+        :type chunk_info: list of tuples (filepath, start_time, end_time)
+        :return: Generator with extracted frames in max_process_frames chunks, difference between frames, and timestamps
+        :rtype: generator object with content of type [numpy.ndarray, numpy.ndarray, list] 
+        """
+        cap = cv2.VideoCapture(str(videopath))
+
+        ret, frame = cap.read()
+        i = 1
+        chunk_no = 0
+        
+        while ret:
+            curr_frame = None
+            prev_frame = None
+
+            frame_diffs = []
+            frames = []
+            timestamps = []  # List to store timestamps
+            
+            for _ in range(0, self.max_frames_in_chunk):
+                if ret:
+                    # Get timestamp in seconds
+                    timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
+                    timestamps.append((chunk_info[1] + timestamp, chunk_info[1], chunk_info[2]))
+                    
+                    # Process frame and calculate differences
+                    prev_frame, curr_frame = self.__process_frame(frame, prev_frame, frame_diffs, frames)
+                    i = i + 1
+                    ret, frame = cap.read()
+                else:
+                    cap.release()
+                    break
+            chunk_no = chunk_no + 1
+            yield frames, frame_diffs, timestamps
+        cap.release()
+
     def __get_frames_in_local_maxima__(self, frames, frame_diffs):
         """ Internal function for getting local maxima of key frames 
         This functions Returns one single image with strongest change from its vicinity of frames 
         ( vicinity defined using window length ) 
 
-        :param object: base class inheritance
-        :type object: class:`Object`
         :param frames: list of frames to do local maxima on
         :type frames: `list of images`
         :param frame_diffs: list of frame difference values 
         :type frame_diffs: `list of images`
-
+        :return: list of extracted key frames
+        :rtype: list
         """
         extracted_key_frames = []
         diff_array = np.array(frame_diffs)
@@ -139,6 +178,38 @@ def __get_frames_in_local_maxima__(self, frames, frame_diffs):
         del frame_diffs[:]
         return extracted_key_frames
 
+    def __get_frames_in_local_maxima_with_time__(self, frames, frame_diffs, timestamps):
+        """ Internal function for getting local maxima of key frames with timestamps
+        This functions Returns one single image with strongest change from its vicinity of frames 
+        ( vicinity defined using window length ) 
+
+        :param frames: list of frames to do local maxima on
+        :type frames: `list of images`
+        :param frame_diffs: list of frame difference values 
+        :type frame_diffs: `list of images`
+        :param timestamps: list of timestamps
+        :type timestamps: list of tuples (timestamp, chunk_start, chunk_end)
+        :return: tuple of (list of extracted key frames, list of timestamps)
+        :rtype: tuple(list, list)
+        """
+        extracted_key_frames = []
+        extracted_timestamps = []
+        diff_array = np.array(frame_diffs)
+        # Normalizing the frame differences based on windows parameters
+        sm_diff_array = self.__smooth__(diff_array, self.len_window)
+
+        # Get the indexes of those frames which have maximum differences
+        frame_indexes = np.asarray(argrelextrema(sm_diff_array, np.greater))[0]
+
+        for frame_index in frame_indexes:
+            extracted_key_frames.append(frames[frame_index - 1])
+            extracted_timestamps.append(timestamps[frame_index - 1])
+        del frames[:]
+        del sm_diff_array
+        del diff_array
+        del frame_diffs[:]
+        return extracted_key_frames, extracted_timestamps
+
     def __smooth__(self, x, window_len, window=config.FrameExtractor.window_type):
         """smooth the data using a window with requested size.
         This method is based on the convolution of a scaled window with the signal.
@@ -188,36 +259,59 @@ def __smooth__(self, x, window_len, window=config.FrameExtractor.window_type):
         return y[window_len - 1 : -window_len + 1]
 
     def extract_candidate_frames(self, videopath):
-        """ Pubic function for this module , Given and input video path
+        """ Public function for this module , Given and input video path
         This functions Returns one list of all candidate key-frames  
 
-        :param object: base class inheritance
-        :type object: class:`Object`
         :param videopath: inputvideo path
         :type videopath: `str`
-        :return: opencv.Image.Image objects
+        :return: list of opencv.Image.Image objects
         :rtype: list
         """
-
         extracted_candidate_key_frames = []
 
         # Get all frames from video in chunks using python Generators
-        frame_extractor_from_video_generator = self.__extract_all_frames_from_video__(
-            videopath
-        )
+        frame_extractor_from_video_generator = self.__extract_all_frames_from_video__(videopath)
 
         # Loop over every frame in the frame extractor generator object and calculate the
         # local maxima of frames 
         for frames, frame_diffs in frame_extractor_from_video_generator:
-            extracted_candidate_key_frames_chunk = []
             if self.USE_LOCAL_MAXIMA:
-
                 # Getting the frame with maximum frame difference
                 extracted_candidate_key_frames_chunk = self.__get_frames_in_local_maxima__(
                     frames, frame_diffs
                 )
-                extracted_candidate_key_frames.extend(
-                    extracted_candidate_key_frames_chunk
-                )
+                extracted_candidate_key_frames.extend(extracted_candidate_key_frames_chunk)
 
         return extracted_candidate_key_frames
+
+    def extract_candidate_frames_with_time(self, args):
+        """ Public function for this module , Given an input video path and chunk info
+        This functions Returns one list of all candidate key-frames and their timestamps
+
+        :param args: tuple of (videopath, chunk_info)
+        :type args: tuple(str, list)
+        :return: tuple of (list of opencv.Image.Image objects, list of timestamps in seconds)
+        :rtype: tuple(list, list)
+        """
+        videopath, chunk_info = args
+        
+        extracted_candidate_key_frames = []
+        extracted_timestamps = []
+
+        # Get all frames from video in chunks using python Generators
+        frame_extractor_from_video_generator = self.__extract_all_frames_from_video_with_time__(
+            videopath, chunk_info
+        )
+
+        # Loop over every frame in the frame extractor generator object and calculate the
+        # local maxima of frames 
+        for frames, frame_diffs, timestamps in frame_extractor_from_video_generator:
+            if self.USE_LOCAL_MAXIMA:
+                # Getting the frame with maximum frame difference
+                extracted_candidate_key_frames_chunk, extracted_timestamps_chunk = self.__get_frames_in_local_maxima_with_time__(
+                    frames, frame_diffs, timestamps
+                )
+                extracted_candidate_key_frames.extend(extracted_candidate_key_frames_chunk)
+                extracted_timestamps.extend(extracted_timestamps_chunk)
+
+        return extracted_candidate_key_frames, extracted_timestamps
diff --git a/Katna/image_selector.py b/Katna/image_selector.py
index 73d05c1..6b4eb29 100644
--- a/Katna/image_selector.py
+++ b/Katna/image_selector.py
@@ -54,6 +54,8 @@ def __get_brightness_score__(self, image):
         :return: result of Brightness measurment 
         :rtype: float value between 0.0 to 100.0    
         """
+        if len(image.shape) == 2:  # If image is grayscale
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
         hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
         _, _, v = cv2.split(hsv)
         sum = np.sum(v, dtype=np.float32)
@@ -252,6 +254,73 @@ def __get_best_images_index_from_each_cluster__(
     #     """
     #     self.__dict__.update(state)
 
+    def select_best_frames_with_time(self, input_key_frames, number_of_frames, input_timestamps):
+        """[summary] Public function for Image selector class: takes list of key-frames images and number of required
+        frames as input, returns list of filtered keyframes
+
+        :param object: base class inheritance
+        :type object: class:`Object`
+        :param input_key_frames: list of input keyframes in list of opencv image format 
+        :type input_key_frames: python list opencv images
+        :param number_of_frames: Required number of images 
+        :type: int   
+        :param input_timestamps: list of timestamps for each keyframe
+        :type: python list of timestamps
+        :return: Returns list of filtered image files 
+        :rtype: python list of images
+        """
+
+        self.nb_clusters = number_of_frames
+
+        filtered_key_frames = []
+        filtered_images_list = []   
+        filtered_timestamps = []
+        # Repeat until number of frames 
+        min_brightness_values = np.arange(config.ImageSelector.min_brightness_value, -0.01, -self.brightness_step)
+        max_brightness_values = np.arange(config.ImageSelector.max_brightness_value, 100.01, self.brightness_step)
+        min_entropy_values = np.arange(config.ImageSelector.min_entropy_value, -0.01, -self.entropy_step)
+        max_entropy_values = np.arange(config.ImageSelector.max_entropy_value, 10.01, self.entropy_step)
+        
+        for (min_brightness_value, max_brightness_value, min_entropy_value, max_entropy_value) in itertools.zip_longest(min_brightness_values, max_brightness_values, min_entropy_values, max_entropy_values): 
+            if min_brightness_value is None:
+                min_brightness_value = 0.0
+            if max_brightness_value is None:
+                max_brightness_value = 100.0
+            if min_entropy_value is None:
+                min_entropy_value = 0.0
+            if max_entropy_value is None:
+                max_entropy_value = 10.0
+            self.min_brightness_value = min_brightness_value
+            self.max_brightness_value = max_brightness_value
+            self.min_entropy_value = min_entropy_value
+            self.max_entropy_value = max_entropy_value
+            filtered_key_frames = self.__filter_optimum_brightness_and_contrast_images__(
+                input_key_frames, 
+            )
+            if len(filtered_key_frames) >= number_of_frames:
+                break
+
+        # Selecting the best images from each cluster by first preparing the clusters on basis of histograms 
+        # and then selecting the best images from every cluster
+        if len(filtered_key_frames) >= self.nb_clusters:
+            files_clusters_index_array = self.__prepare_cluster_sets__(filtered_key_frames)
+            selected_images_index = self.__get_best_images_index_from_each_cluster__(
+                filtered_key_frames, files_clusters_index_array
+            )
+
+            for index in selected_images_index:
+                img = filtered_key_frames[index]
+                filtered_images_list.append(img)
+                filtered_timestamps.append(input_timestamps[index])
+        else:
+            # if number of required files are less than requested key-frames return all the files
+            for i, img in enumerate(filtered_key_frames):
+                filtered_images_list.append(img)
+                filtered_timestamps.append(input_timestamps[i])
+        return filtered_images_list, filtered_timestamps
+
+
+
     def select_best_frames(self, input_key_frames, number_of_frames):
         """[summary] Public function for Image selector class: takes list of key-frames images and number of required
         frames as input, returns list of filtered keyframes
diff --git a/Katna/video.py b/Katna/video.py
index 6c6f059..c8b2a4b 100644
--- a/Katna/video.py
+++ b/Katna/video.py
@@ -31,7 +31,6 @@
 import functools
 import operator
 
-
 class Video(object):
     """Class for all video frames operations
 
@@ -158,29 +157,26 @@ def resize_video_from_dir(self, dir_path, abs_dir_path_output, aspect_ratio):
     def _extract_keyframes_from_video(self, no_of_frames, file_path):
         """Core method to extract keyframe for a video
 
-        :param no_of_frames: [description]
-        :type no_of_frames: [type]
-        :param file_path: [description]
-        :type file_path: [type]
+        :param no_of_frames: number of keyframes to extract
+        :type no_of_frames: int
+        :param file_path: path to video file
+        :type file_path: str
+        :return: list of keyframes
+        :rtype: list
         """
-        # Creating the multiprocessing pool
         self.pool_extractor = Pool(processes=self.n_processes)
-        # Split the input video into chunks. Each split(video) will be stored
-        # in a temp
+        
         if not helper._check_if_valid_video(file_path):
             raise Exception("Invalid or corrupted video: " + file_path)
 
-        # split videos in chunks in smaller chunks for parallel processing.
         chunked_videos = self._split(file_path)
         frame_extractor = FrameExtractor()
 
-        # Passing all the clipped videos for the frame extraction using map function of the
-        # multiprocessing pool
+        extracted_candidate_frames = []
         with self.pool_extractor:
-            extracted_candidate_frames = self.pool_extractor.map(
-                frame_extractor.extract_candidate_frames, chunked_videos
-            )
-        # Converting the nested list of extracted frames into 1D list
+            results = self.pool_extractor.map(frame_extractor.extract_candidate_frames, chunked_videos)
+            extracted_candidate_frames.extend(results)
+
         extracted_candidate_frames = functools.reduce(operator.iconcat, extracted_candidate_frames, [])
 
         self._remove_clips(chunked_videos)
@@ -191,9 +187,53 @@ def _extract_keyframes_from_video(self, no_of_frames, file_path):
         )
 
         del extracted_candidate_frames
-
         return top_frames
 
+    def _extract_keyframes_from_video_with_time(self, no_of_frames, file_path, chunk_timing):
+        """Core method to extract keyframe for a video with timestamps
+
+        :param no_of_frames: number of keyframes to extract
+        :type no_of_frames: int
+        :param file_path: path to video file
+        :type file_path: str
+        :param chunk_timing: tuple of (start_time, clip_start, clip_end)
+        :type chunk_timing: tuple(float, float, float)
+        :return: tuple of (list of keyframes, list of timestamps)
+        :rtype: tuple(list, list)
+        """
+        self.pool_extractor = Pool(processes=self.n_processes)
+        
+        if not helper._check_if_valid_video(file_path):
+            raise Exception("Invalid or corrupted video: " + file_path)
+
+        chunked_videos, chunk_info = self._split_with_time(file_path, chunk_timing)
+        video_chunks_with_info = list(zip(chunked_videos, chunk_info))
+
+        frame_extractor = FrameExtractor()
+
+        extracted_candidate_frames = []
+        extracted_timestamps = []
+
+        with self.pool_extractor:
+            results = self.pool_extractor.map(frame_extractor.extract_candidate_frames_with_time, video_chunks_with_info)
+            extracted_candidate_frames.extend([result[0] for result in results])
+            extracted_timestamps.extend([result[1] for result in results])
+
+        extracted_candidate_frames = functools.reduce(operator.iconcat, extracted_candidate_frames, [])
+        extracted_timestamps = functools.reduce(operator.iconcat, extracted_timestamps, [])
+
+        self._remove_clips(chunked_videos)
+        image_selector = ImageSelector(self.n_processes)
+
+        top_frames, top_timestamps = image_selector.select_best_frames_with_time(
+            extracted_candidate_frames, no_of_frames, extracted_timestamps
+        )
+
+        del extracted_candidate_frames
+        del extracted_timestamps
+
+        return top_frames, top_timestamps
+
     def _extract_keyframes_for_files_iterator(self, no_of_frames, list_of_filepaths):
         """Extract desirable number of keyframes for files in the list of filepaths.
 
@@ -257,17 +297,70 @@ def extract_keyframes_from_videos_dir(self, no_of_frames, dir_path, writer):
         else:
             print("All the files in directory %s are invalid video files" % dir_path)
 
-    def extract_video_keyframes_big_video(self, no_of_frames, file_path):
+    def extract_video_keyframes(self, no_of_frames, file_path, writer):
+        """Returns a list of best key images/frames from a single video.
+
+        :param no_of_frames: Number of key frames to be extracted
+        :type no_of_frames: int, required
+        :param file_path: video file location
+        :type file_path: str, required
+        :param writer: Writer object to process keyframe data
+        :type writer: Writer, required
+        :return: List of numpy.2darray Image objects
+        :rtype: list
         """
+        # get the video duration
+        video_duration = self._get_video_duration_with_cv(file_path)
 
-        :param no_of_frames:
-        :type no_of_frames:
-        :param file_path:
-        :type file_path:
-        :return:
-        :rtype:
+        # duration is in seconds
+        if video_duration > (config.Video.video_split_threshold_in_minutes * 60):
+            print("Large Video (duration = %s min), will split into smaller videos " % round(video_duration / 60))
+            top_frames = self.extract_video_keyframes_big_video(no_of_frames, file_path)
+        else:
+            top_frames = self._extract_keyframes_from_video(no_of_frames, file_path)
+
+        writer.write(file_path, top_frames)
+        print("Completed processing for : ", file_path)
+        
+        return top_frames
+
+    def extract_video_keyframes_with_time(self, no_of_frames, file_path, writer):
+        """Returns a list of best key images/frames from a single video with their timestamps.
+
+        :param no_of_frames: Number of key frames to be extracted
+        :type no_of_frames: int, required
+        :param file_path: video file location
+        :type file_path: str, required
+        :param writer: Writer object to process keyframe data
+        :type writer: Writer, required
+        :return: Tuple of (List of numpy.2darray Image objects, List of timestamps)
+        :rtype: tuple(list, list)
         """
+        # get the video duration
+        video_duration = self._get_video_duration_with_cv(file_path)
+
+        # duration is in seconds
+        if video_duration > (config.Video.video_split_threshold_in_minutes * 60):
+            print("Large Video (duration = %s min), will split into smaller videos " % round(video_duration / 60))
+            top_frames, top_timestamps = self.extract_video_keyframes_big_video_with_time(no_of_frames, file_path)
+        else:
+            top_frames, top_timestamps = self._extract_keyframes_from_video_with_time(no_of_frames, file_path)
 
+        writer.write(file_path, top_frames)
+        print("Completed processing for : ", file_path)
+        
+        return top_frames, top_timestamps
+
+    def extract_video_keyframes_big_video(self, no_of_frames, file_path):
+        """Extract keyframes from a large video by splitting it into chunks
+
+        :param no_of_frames: number of frames to extract
+        :type no_of_frames: int
+        :param file_path: path to video file
+        :type file_path: str
+        :return: list of keyframes
+        :rtype: list
+        """
         # split the videos with break point at 20 min
         video_splits = self._split_large_video(file_path)
         print("Video split complete.")
@@ -276,7 +369,7 @@ def extract_video_keyframes_big_video(self, no_of_frames, file_path):
 
         # call _extract_keyframes_from_video
         for split_video_file_path in video_splits:
-            # print("Processing split : ", split_video_file_path)
+            print("Processing split video: ", split_video_file_path)
             top_frames_split = self._extract_keyframes_from_video(no_of_frames, split_video_file_path)
             all_top_frames_split.append(top_frames_split)
 
@@ -293,6 +386,46 @@ def extract_video_keyframes_big_video(self, no_of_frames, file_path):
         )
 
         return top_frames
+    
+    def extract_video_keyframes_big_video_with_time(self, no_of_frames, file_path):
+        """Extract keyframes from a large video by splitting it into chunks, with timestamps
+
+        :param no_of_frames: number of frames to extract
+        :type no_of_frames: int
+        :param file_path: path to video file
+        :type file_path: str
+        :return: tuple of (list of keyframes, list of timestamps)
+        :rtype: tuple(list, list)
+        """
+        # split the videos with break point at 20 min
+        video_splits, chunk_info = self._split_large_video_with_time(file_path)
+        print("Video split complete.")
+
+        all_top_frames_split = []
+        all_timestamps_split = []
+
+        # call _extract_keyframes_from_video_with_time
+        for split_video_file_path, chunk_timing in zip(video_splits, chunk_info):
+            top_frames_split, timestamps_split = self._extract_keyframes_from_video_with_time(
+                no_of_frames, split_video_file_path, chunk_timing
+            )
+            all_top_frames_split.append(top_frames_split)
+            all_timestamps_split.append(timestamps_split)
+
+        # collect and merge keyframes to get no_of_frames
+        self._remove_clips(video_splits)
+        image_selector = ImageSelector(self.n_processes)
+
+        # list of list to 1d list
+        extracted_candidate_frames = functools.reduce(operator.iconcat, all_top_frames_split, [])
+        extracted_timestamps = functools.reduce(operator.iconcat, all_timestamps_split, [])
+
+        # top frames
+        top_frames, top_timestamps = image_selector.select_best_frames_with_time(
+            extracted_candidate_frames, no_of_frames, extracted_timestamps
+        )
+
+        return top_frames, top_timestamps
 
     @FileDecorators.validate_file_path
     def extract_video_keyframes(self, no_of_frames, file_path, writer):
@@ -323,49 +456,53 @@ def extract_video_keyframes(self, no_of_frames, file_path, writer):
         
         # returning top frames for processing by caller
         return top_frames
-    def _split_large_video(self, file_path):
-        """
-        Splits large video file into smaller videos (based on conf) so they don't take up memory
-        :param file_path: path of video file
-        :type file_path: str, required
-        :return: List of path of splitted video clips
-        :rtype: list
-        """
 
-        break_duration_in_sec = config.Video.video_split_threshold_in_minutes * 60
-
-        # get the size of the frame
-        video_info = helper.get_video_info(file_path)
-        frame_size_in_bytes = video_info[0]
-        fps = video_info[1]
-
-        # get the available space
-        free_space_in_bytes = psutil.virtual_memory().available
+    def _split(self, file_path):
+        chunked_videos = self._split_with_ffmpeg(file_path)
+        corruption_in_chunked_videos = False
+        for chunked_video in chunked_videos:
+            if not helper._check_if_valid_video(chunked_video):
+                corruption_in_chunked_videos = True
 
-        # based on config calculate available memory
-        available_memory = config.Video.memory_consumption_threshold * free_space_in_bytes
+        if corruption_in_chunked_videos:
+            chunked_videos = self._split_with_ffmpeg(file_path, override_video_codec=True)
+            for chunked_video in chunked_videos:
+                if not helper._check_if_valid_video(chunked_video):
+                    raise Exception(
+                        "Error in splitting videos in multiple chunks, corrupted video chunk: "
+                        + chunked_video
+                    )
 
-        # seconds to reach threshold if all frames are collected, but not all are candidate frames
-        # so we can easily multiple this number with a constant
-        no_of_sec_to_reach_threshold = (available_memory / (fps * frame_size_in_bytes)) * config.Video.assumed_no_of_frames_per_candidate_frame
+        return chunked_videos
 
-        if break_duration_in_sec > no_of_sec_to_reach_threshold:
-            break_duration_in_sec = math.floor(no_of_sec_to_reach_threshold)
+    def _split_with_time(self, file_path, chunk_timing):
+        chunked_videos, chunk_info = self._split_with_ffmpeg_with_time(file_path, chunk_timing)
+        corruption_in_chunked_videos = False
+        for chunked_video in chunked_videos:
+            if not helper._check_if_valid_video(chunked_video):
+                corruption_in_chunked_videos = True
 
-        # print("Split duration for video (in min) is : ", break_duration_in_sec / 60)
+        if corruption_in_chunked_videos:
+            chunked_videos, _ = self._split_with_ffmpeg_with_time(file_path, override_video_codec=True, chunk_timing=chunk_timing)
+            for chunked_video in chunked_videos:
+                if not helper._check_if_valid_video(chunked_video):
+                    raise Exception(
+                        "Error in splitting videos in multiple chunks, corrupted video chunk: "
+                        + chunked_video
+                    )
 
-        video_splits = self._split_with_ffmpeg(file_path,
-                                               break_point_duration_in_sec=break_duration_in_sec)
+        return chunked_videos, chunk_info
 
+    def _split_large_video(self, file_path):
+        break_duration_in_sec = self._calculate_break_duration(file_path)
+        video_splits = self._split_with_ffmpeg(file_path, break_point_duration_in_sec=break_duration_in_sec)
         corruption_in_chunked_videos = False
         for chunked_video in video_splits:
             if not helper._check_if_valid_video(chunked_video):
                 corruption_in_chunked_videos = True
 
         if corruption_in_chunked_videos:
-            video_splits = self._split_with_ffmpeg(file_path,
-                                                   override_video_codec=True,
-                                                   break_point_duration_in_sec=break_duration_in_sec)
+            video_splits = self._split_with_ffmpeg(file_path, override_video_codec=True, break_point_duration_in_sec=break_duration_in_sec)
             for chunked_video in video_splits:
                 if not helper._check_if_valid_video(chunked_video):
                     raise Exception(
@@ -375,174 +512,79 @@ def _split_large_video(self, file_path):
 
         return video_splits
 
-    def _split(self, file_path):
-        """Split videos using ffmpeg library first by copying audio and
-        video codecs from input files, it leads to faster splitting, But if
-        resulting splitted videos are unreadable try again splitting by using
-        ffmpeg default codecs. If splitteed videos are still unreadable throw an
-        exception.
-
-        :param file_path: path of video file
-        :type file_path: str, required
-        :return: List of path of splitted video clips
-        :rtype: list
-        """
-        chunked_videos = self._split_with_ffmpeg(file_path)
+    def _split_large_video_with_time(self, file_path):
+        break_duration_in_sec = self._calculate_break_duration(file_path)
+        video_splits, chunk_info = self._split_with_ffmpeg_with_time(
+            file_path, 
+            break_point_duration_in_sec=break_duration_in_sec, 
+            chunk_timing=("", 0, 0)
+        )
         corruption_in_chunked_videos = False
-        for chunked_video in chunked_videos:
+        for chunked_video in video_splits:
             if not helper._check_if_valid_video(chunked_video):
                 corruption_in_chunked_videos = True
 
         if corruption_in_chunked_videos:
-            chunked_videos = self._split_with_ffmpeg(file_path, override_video_codec=True)
-            for chunked_video in chunked_videos:
+            video_splits, chunk_info = self._split_with_ffmpeg_with_time(
+                file_path, 
+                override_video_codec=True, 
+                break_point_duration_in_sec=break_duration_in_sec,
+                chunk_timing=("", 0, 0)
+            )
+            for chunked_video in video_splits:
                 if not helper._check_if_valid_video(chunked_video):
                     raise Exception(
                         "Error in splitting videos in multiple chunks, corrupted video chunk: "
                         + chunked_video
                     )
 
-        return chunked_videos
+        return video_splits, chunk_info
 
-    @FileDecorators.validate_file_path
-    def compress_video(
-        self,
-        file_path,
-        force_overwrite=False,
-        crf_parameter=config.Video.video_compression_crf_parameter,
-        output_video_codec=config.Video.video_compression_codec,
-        out_dir_path="",
-        out_file_name="",
-    ):
-        """Function to compress given input file
-
-        :param file_path: Input file path
-        :type file_path: str
-        :param force_overwrite: optional parameter if True then if there is \
-        already a file in output file location function will overwrite it, defaults to False
-        :type force_overwrite: bool, optional
-        :param crf_parameter: Constant Rate Factor Parameter for controlling \
-        amount of video compression to be applied, The range of the quantizer scale is 0-51:\
-        where 0 is lossless, 23 is default, and 51 is worst possible.\
-        It is recommend to keep this value between 20 to 30 \
-        A lower value is a higher quality, you can change default value by changing \
-        config.Video.video_compression_crf_parameter
-        :type crf_parameter: int, optional
-        :param output_video_codec: Type of video codec to choose, \
-        Currently supported options are libx264 and libx265, libx264 is default option.\
-        libx264 is more widely supported on different operating systems and platforms, \
-        libx265 uses more advanced x265 codec and results in better compression and even less \
-        output video sizes with same or better quality. Right now libx265 is not as widely compatible \
-        on older versions of MacOS and Widows by default. If wider video compatibility is your goal \
-        you should use libx264., you can change default value by changing \
-        Katna.config.Video.video_compression_codec
-        :type output_video_codec: str, optional
-        :param out_dir_path: output folder path where you want output video to be saved, defaults to ""
-        :type out_dir_path: str, optional
-        :param out_file_name: output filename, if not mentioned it will be same as input filename, defaults to ""
-        :type out_file_name: str, optional
-        :raises Exception: raises FileNotFoundError Exception if input video file not found, also exception is raised in case output video file path already exist and force_overwrite is not set to True.
-        :return: Status code Returns True if video compression was successfull else False
-        :rtype: bool
-        """
-        # TODO add docstring for exeception
-        # Add details where libx265 will make sense
-
-        if not helper._check_if_valid_video(file_path):
-            raise Exception("Invalid or corrupted video: " + file_path)
-        # Intialize video compression class
-        video_compressor = VideoCompressor()
-        # Run video compression
-        status = video_compressor.compress_video(
-            file_path,
-            force_overwrite,
-            crf_parameter,
-            output_video_codec,
-            out_dir_path,
-            out_file_name,
-        )
-        return status
-
-    @FileDecorators.validate_dir_path
-    def compress_videos_from_dir(
-        self,
-        dir_path,
-        force_overwrite=False,
-        crf_parameter=config.Video.video_compression_crf_parameter,
-        output_video_codec=config.Video.video_compression_codec,
-        out_dir_path="",
-        out_file_name="",
-    ):
-        """Function to compress input video files in a folder
+    def _split_with_ffmpeg(self, file_path, override_video_codec=False, break_point_duration_in_sec=None):
+        """Function to split the videos and persist the chunks
 
-        :param dir_path: Input folder path
-        :type dir_path: str
-        :param force_overwrite: optional parameter if True then if there is \
-        already a file in output file location function will overwrite it, defaults to False
-        :type force_overwrite: bool, optional
-        :param crf_parameter: Constant Rate Factor Parameter for controlling \
-        amount of video compression to be applied, The range of the quantizer scale is 0-51:\
-        where 0 is lossless, 23 is default, and 51 is worst possible.\
-        It is recommend to keep this value between 20 to 30 \
-        A lower value is a higher quality, you can change default value by changing \
-        config.Video.video_compression_crf_parameter
-        :type crf_parameter: int, optional
-        :param output_video_codec: Type of video codec to choose, \
-        Currently supported options are libx264 and libx265, libx264 is default option.\
-        libx264 is more widely supported on different operating systems and platforms, \
-        libx265 uses more advanced x265 codec and results in better compression and even less \
-        output video sizes with same or better quality. Right now libx265 is not as widely compatible \
-        on older versions of MacOS and Widows by default. If wider video compatibility is your goal \
-        you should use libx264., you can change default value by changing Katna.config.Video.video_compression_codec
-        :type output_video_codec: str, optional
-        :param out_dir_path: output folder path where you want output video to be saved, defaults to ""
-        :type out_dir_path: str, optional
-        :raises Exception: raises FileNotFoundError Exception if input video file not found, also exception is raised in case output video file path already exist and force_overwrite is not set to True.
-        :return: Status code Returns True if video compression was successfull else False
-        :rtype: bool
+        :param file_path: path of video file
+        :type file_path: str, required
+        :param override_video_codec: If true overrides input video codec to ffmpeg default codec else copy input video codec, defaults to False
+        :type override_video_codec: bool, optional
+        :param break_point_duration_in_sec: duration in sec for break point
+        :type break_point_duration_in_sec: int, optional
+        :return: List of video clip paths
+        :rtype: list
         """
-        status = True
-        list_of_videos_to_process = []
-        # Collect all the valid video files inside folder
-        for path, _, files in os.walk(dir_path):
-            for filename in files:
-                video_file_path = os.path.join(path, filename)
-                if helper._check_if_valid_video(video_file_path):
-                    list_of_videos_to_process.append(video_file_path)
-
-        # Need to run in two sepearte loops to prevent recursion
-        for video_file_path in list_of_videos_to_process:
-            statusI = self.compress_video(
-                video_file_path,
-                force_overwrite=force_overwrite,
-                crf_parameter=crf_parameter,
-                output_video_codec=output_video_codec,
-                out_dir_path=out_dir_path,
+        clipped_files = []
+        duration = self._get_video_duration_with_cv(file_path)
+        
+        # Calculate break points
+        if break_point_duration_in_sec is None:
+            clip_start, break_point = (
+                0,
+                duration // cpu_count() if duration // cpu_count() > 15 else 25,
+            )
+        else:
+            clip_start, break_point = (
+                0,
+                break_point_duration_in_sec,
             )
-            status = bool(status and statusI)
-        return status
 
-    @FileDecorators.validate_file_path
-    def save_frame_to_disk(self, frame, file_path, file_name, file_ext):
-        """saves an in-memory numpy image array on drive.
+        # Loop over the video duration to get the clip stating point and end point to split the video
+        while clip_start < duration:
+            clip_end = clip_start + break_point
 
-        :param frame: In-memory image. This would have been generated by extract_video_keyframes method
-        :type frame: numpy.ndarray, required
-        :param file_name: name of the image.
-        :type file_name: str, required
-        :param file_path: Folder location where files needs to be saved
-        :type file_path: str, required
-        :param file_ext: File extension indicating the file type for example - '.jpg'
-        :type file_ext: str, required
-        :return: None
-        """
+            # Setting the end position of the particular clip equals to the end time of original clip,
+            # if end position or end position added with the **min_video_duration** is greater than
+            # the end time of original video
+            if clip_end > duration or (clip_end + self._min_video_duration) > duration:
+                clip_end = duration
 
-        file_full_path = os.path.join(file_path, file_name + file_ext)
-        cv2.imwrite(file_full_path, frame)
+            filepath = self._write_videofile(file_path, clip_start, clip_end, override_video_codec)
+            clipped_files.append(filepath)
+            clip_start = clip_end
 
-    @FileDecorators.validate_file_path
-    def _split_with_ffmpeg(self, file_path, override_video_codec=False, break_point_duration_in_sec=None):
-        """Function to split the videos and persist the chunks
+        return clipped_files
+
+    def _split_with_ffmpeg_with_time(self, file_path, chunk_timing, override_video_codec=False, break_point_duration_in_sec=None):
+        """Function to split the videos and persist the chunks, returning timing information
 
         :param file_path: path of video file
         :type file_path: str, required
@@ -550,23 +592,14 @@ def _split_with_ffmpeg(self, file_path, override_video_codec=False, break_point_
         :type override_video_codec: bool, optional
         :param break_point_duration_in_sec: duration in sec for break point
         :type break_point_duration_in_sec: int, optional
-        :return: List of path of splitted video clips
-        :rtype: list
+        :return: Tuple of (list of video paths, list of chunk timing info)
+        :rtype: tuple(list, list)
         """
         clipped_files = []
+        chunk_info = []  # List to store (filepath, start_time, end_time) tuples
         duration = self._get_video_duration_with_cv(file_path)
-        # setting the start point to zero
-        # Setting the breaking point for the clip to be 25 or if video is big
-        # then relative to core available in the machine
-        # If video size is large it makes sense to split videos into chunks
-        # proportional to number of cpu cores. So each cpu core will get on
-        # video to process.
-        # if video duration is divided by cpu_count() then result should be
-        # 15 sec is thumb rule for threshold value it could be set to 25 or
-        # any other value. Logic ensures for large enough videos we don't end
-        # up dividing video in too many clips.
-        # TODO: Try max 5 minutes video
-
+        
+        # Calculate break points
         if break_point_duration_in_sec is None:
             clip_start, break_point = (
                 0,
@@ -580,7 +613,6 @@ def _split_with_ffmpeg(self, file_path, override_video_codec=False, break_point_
 
         # Loop over the video duration to get the clip stating point and end point to split the video
         while clip_start < duration:
-
             clip_end = clip_start + break_point
 
             # Setting the end position of the particular clip equals to the end time of original clip,
@@ -589,12 +621,12 @@ def _split_with_ffmpeg(self, file_path, override_video_codec=False, break_point_
             if clip_end > duration or (clip_end + self._min_video_duration) > duration:
                 clip_end = duration
 
-            clipped_files.append(
-                self._write_videofile(file_path, clip_start, clip_end, override_video_codec)
-            )
-
+            filepath = self._write_videofile(file_path, clip_start, clip_end, override_video_codec)
+            clipped_files.append(filepath)
+            chunk_info.append((filepath, chunk_timing[1] + clip_start, chunk_timing[1] + clip_end))  # Store filepath with start and end times
             clip_start = clip_end
-        return clipped_files
+
+        return clipped_files, chunk_info
 
     def _write_videofile(self, video_file_path, start, end, override_video_codec=False):
         """Function to clip the video for given start and end points and save the video
@@ -747,3 +779,163 @@ def _get_video_duration_with_ffmpeg(self, file_path):
                 f"Here are the file infos returned by ffmpeg:\n\n{infos}"
             )
         return video_duration
+
+    def _calculate_break_duration(self, file_path):
+        """Calculate appropriate break duration for video splitting
+        
+        :param file_path: path of video file
+        :type file_path: str, required
+        :return: Break duration in seconds
+        :rtype: float
+        """
+        break_duration_in_sec = config.Video.video_split_threshold_in_minutes * 60
+
+        video_info = helper.get_video_info(file_path)
+        frame_size_in_bytes = video_info[0]
+        fps = video_info[1]
+
+        free_space_in_bytes = psutil.virtual_memory().available
+        available_memory = config.Video.memory_consumption_threshold * free_space_in_bytes
+
+        no_of_sec_to_reach_threshold = (available_memory / (fps * frame_size_in_bytes)) * config.Video.assumed_no_of_frames_per_candidate_frame
+
+        if break_duration_in_sec > no_of_sec_to_reach_threshold:
+            break_duration_in_sec = math.floor(no_of_sec_to_reach_threshold)
+
+        return break_duration_in_sec
+
+    @FileDecorators.validate_file_path
+    def compress_video(
+        self,
+        file_path,
+        force_overwrite=False,
+        crf_parameter=config.Video.video_compression_crf_parameter,
+        output_video_codec=config.Video.video_compression_codec,
+        out_dir_path="",
+        out_file_name="",
+    ):
+        """Function to compress given input file
+
+        :param file_path: Input file path
+        :type file_path: str
+        :param force_overwrite: optional parameter if True then if there is \
+        already a file in output file location function will overwrite it, defaults to False
+        :type force_overwrite: bool, optional
+        :param crf_parameter: Constant Rate Factor Parameter for controlling \
+        amount of video compression to be applied, The range of the quantizer scale is 0-51:\
+        where 0 is lossless, 23 is default, and 51 is worst possible.\
+        It is recommend to keep this value between 20 to 30 \
+        A lower value is a higher quality, you can change default value by changing \
+        config.Video.video_compression_crf_parameter
+        :type crf_parameter: int, optional
+        :param output_video_codec: Type of video codec to choose, \
+        Currently supported options are libx264 and libx265, libx264 is default option.\
+        libx264 is more widely supported on different operating systems and platforms, \
+        libx265 uses more advanced x265 codec and results in better compression and even less \
+        output video sizes with same or better quality. Right now libx265 is not as widely compatible \
+        on older versions of MacOS and Widows by default. If wider video compatibility is your goal \
+        you should use libx264., you can change default value by changing \
+        Katna.config.Video.video_compression_codec
+        :type output_video_codec: str, optional
+        :param out_dir_path: output folder path where you want output video to be saved, defaults to ""
+        :type out_dir_path: str, optional
+        :param out_file_name: output filename, if not mentioned it will be same as input filename, defaults to ""
+        :type out_file_name: str, optional
+        :raises Exception: raises FileNotFoundError Exception if input video file not found, also exception is raised in case output video file path already exist and force_overwrite is not set to True.
+        :return: Status code Returns True if video compression was successfull else False
+        :rtype: bool
+        """
+        # TODO add docstring for exeception
+        # Add details where libx265 will make sense
+
+        if not helper._check_if_valid_video(file_path):
+            raise Exception("Invalid or corrupted video: " + file_path)
+        # Intialize video compression class
+        video_compressor = VideoCompressor()
+        # Run video compression
+        status = video_compressor.compress_video(
+            file_path,
+            force_overwrite,
+            crf_parameter,
+            output_video_codec,
+            out_dir_path,
+            out_file_name,
+        )
+        return status
+
+    @FileDecorators.validate_dir_path
+    def compress_videos_from_dir(
+        self,
+        dir_path,
+        force_overwrite=False,
+        crf_parameter=config.Video.video_compression_crf_parameter,
+        output_video_codec=config.Video.video_compression_codec,
+        out_dir_path="",
+        out_file_name="",
+    ):
+        """Function to compress input video files in a folder
+
+        :param dir_path: Input folder path
+        :type dir_path: str
+        :param force_overwrite: optional parameter if True then if there is \
+        already a file in output file location function will overwrite it, defaults to False
+        :type force_overwrite: bool, optional
+        :param crf_parameter: Constant Rate Factor Parameter for controlling \
+        amount of video compression to be applied, The range of the quantizer scale is 0-51:\
+        where 0 is lossless, 23 is default, and 51 is worst possible.\
+        It is recommend to keep this value between 20 to 30 \
+        A lower value is a higher quality, you can change default value by changing \
+        config.Video.video_compression_crf_parameter
+        :type crf_parameter: int, optional
+        :param output_video_codec: Type of video codec to choose, \
+        Currently supported options are libx264 and libx265, libx264 is default option.\
+        libx264 is more widely supported on different operating systems and platforms, \
+        libx265 uses more advanced x265 codec and results in better compression and even less \
+        output video sizes with same or better quality. Right now libx265 is not as widely compatible \
+        on older versions of MacOS and Widows by default. If wider video compatibility is your goal \
+        you should use libx264., you can change default value by changing Katna.config.Video.video_compression_codec
+        :type output_video_codec: str, optional
+        :param out_dir_path: output folder path where you want output video to be saved, defaults to ""
+        :type out_dir_path: str, optional
+        :raises Exception: raises FileNotFoundError Exception if input video file not found, also exception is raised in case output video file path already exist and force_overwrite is not set to True.
+        :return: Status code Returns True if video compression was successfull else False
+        :rtype: bool
+        """
+        status = True
+        list_of_videos_to_process = []
+        # Collect all the valid video files inside folder
+        for path, _, files in os.walk(dir_path):
+            for filename in files:
+                video_file_path = os.path.join(path, filename)
+                if helper._check_if_valid_video(video_file_path):
+                    list_of_videos_to_process.append(video_file_path)
+
+        # Need to run in two sepearte loops to prevent recursion
+        for video_file_path in list_of_videos_to_process:
+            statusI = self.compress_video(
+                video_file_path,
+                force_overwrite=force_overwrite,
+                crf_parameter=crf_parameter,
+                output_video_codec=output_video_codec,
+                out_dir_path=out_dir_path,
+            )
+            status = bool(status and statusI)
+        return status
+
+    @FileDecorators.validate_file_path
+    def save_frame_to_disk(self, frame, file_path, file_name, file_ext):
+        """saves an in-memory numpy image array on drive.
+
+        :param frame: In-memory image. This would have been generated by extract_video_keyframes method
+        :type frame: numpy.ndarray, required
+        :param file_name: name of the image.
+        :type file_name: str, required
+        :param file_path: Folder location where files needs to be saved
+        :type file_path: str, required
+        :param file_ext: File extension indicating the file type for example - '.jpg'
+        :type file_ext: str, required
+        :return: None
+        """
+
+        file_full_path = os.path.join(file_path, file_name + file_ext)
+        cv2.imwrite(file_full_path, frame)
diff --git a/Katna/writer.py b/Katna/writer.py
index 46cbb54..3d912ff 100644
--- a/Katna/writer.py
+++ b/Katna/writer.py
@@ -73,6 +73,7 @@ def __init__(self, location, file_ext=".jpeg"):
         self.output_dir_path = location
         self.file_ext = file_ext
         self._create_dir(location)
+        self.filepaths = []
 
     def generate_output_filename(self, filepath, keyframe_number):
         """Generates the filename of output data file.
@@ -125,7 +126,7 @@ def write(self, filepath, data):
         for counter, img in enumerate(data):
             output_filename = self.generate_output_filename(filepath, keyframe_number=counter)
             self.save_frame_data_to_disk(img, file_name=output_filename)
-
+            self.filepaths.append(output_filename)
 
 class  ImageCropDiskWriter(Writer):
     """DiskWriter for Image Crop