From 9ab0c88e7b4c27d18e0e92a84540eefd3fb1346f Mon Sep 17 00:00:00 2001 From: Sebastian Annies Date: Mon, 9 Feb 2026 21:21:15 +0200 Subject: [PATCH 1/2] use concat.txt --- pycaption/filtergraph.py | 75 ++++++++++++++-------------- tests/test_filtergraph.py | 102 ++++++++++++++++++++++++++++++-------- 2 files changed, 119 insertions(+), 58 deletions(-) diff --git a/pycaption/filtergraph.py b/pycaption/filtergraph.py index 1de1a7d0..ede44a5b 100644 --- a/pycaption/filtergraph.py +++ b/pycaption/filtergraph.py @@ -2,19 +2,21 @@ import zipfile from io import BytesIO +from PIL import Image + from pycaption.base import CaptionSet from pycaption.subtitler_image_based import SubtitleImageBasedWriter class FiltergraphWriter(SubtitleImageBasedWriter): """ - FFmpeg filtergraph writer for image-based subtitles. + FFmpeg subtitle image writer using concat demuxer. - Generates PNG subtitle images and an FFmpeg filtergraph that can be used - to create a transparent WebM video with subtitle overlays. + Generates PNG subtitle images and an FFmpeg concat demuxer file that + sequences blank (transparent) and subtitle images with proper timing. + The concat file can be fed directly to ffmpeg as an input. - By default, generates Full HD (1920x1080) images. The filtergraph uses - the overlay filter with timing to display each subtitle at the correct time. + By default, generates Full HD (1920x1080) images. Uses PNG format for images with 4-color indexed palette for optimal compression (~6 KB per Full HD image). @@ -62,12 +64,15 @@ def write( align='center' ): """ - Write captions as PNG images with an FFmpeg filtergraph for creating - a transparent WebM video overlay. + Write captions as PNG images with an FFmpeg concat demuxer file. Returns a ZIP file containing: - - PNG subtitle images in the specified image_dir - - filtergraph.txt: FFmpeg filter_complex script + - PNG subtitle images in the specified output_dir + - blank.png: Transparent image for gaps between subtitles + - concat.txt: FFmpeg concat demuxer file with timing + + The concat.txt can be used directly as ffmpeg input: + ffmpeg -f concat -safe 0 -i {output_dir}/concat.txt ... :param caption_set: CaptionSet containing the captions to write :param position: Position of subtitles ('top', 'bottom', 'source') @@ -84,37 +89,32 @@ def write( caps, lang, tmpDir, position, align, avoid_same_next_start_prev_end ) - # Calculate total duration (last end time) - max_end = max(cap_list[0].end for cap_list in caps_final) - duration_seconds = max_end / 1_000_000 + 1 # Add 1 second buffer - - # Build FFmpeg filtergraph - # Start with transparent base - filter_parts = [] - filter_parts.append( - f"color=c=black@0:s={self.video_width}x{self.video_height}:d={duration_seconds:.3f},format=yuva444p[base]" - ) - - # Load each image (paths relative to where ffmpeg is run) - for i in range(1, len(caps_final) + 1): - filter_parts.append( - f"movie={self.output_dir}/subtitle{i:04d}.png,format=yuva444p[s{i}]" - ) + # Create blank transparent image for gaps between subtitles + blank = Image.new('RGBA', (self.video_width, self.video_height), (0, 0, 0, 0)) + blank.save(tmpDir + '/blank.png', optimize=True, compress_level=9) - # Chain overlays - prev_label = "base" + # Build concat demuxer file that sequences blank/subtitle images + concat_lines = ['ffconcat version 1.0'] + prev_end_us = 0 for i, cap_list in enumerate(caps_final, 1): - start_sec = self.format_ts_seconds(cap_list[0].start) - end_sec = self.format_ts_seconds(cap_list[0].end) - next_label = f"v{i}" if i < len(caps_final) else "out" + start_us = cap_list[0].start + end_us = cap_list[0].end + + # Gap before this subtitle + gap_us = start_us - prev_end_us + if gap_us > 0: + concat_lines.append('file blank.png') + concat_lines.append(f'duration {gap_us / 1_000_000:.3f}') - filter_parts.append( - f"[{prev_label}][s{i}]overlay=x=0:y=0:enable='between(t,{start_sec},{end_sec})':format=auto[{next_label}]" - ) - prev_label = next_label + # Subtitle image + concat_lines.append(f'file subtitle{i:04d}.png') + concat_lines.append(f'duration {(end_us - start_us) / 1_000_000:.3f}') - filtergraph = ";\n".join(filter_parts) + prev_end_us = end_us + # Trailing blank so the last subtitle doesn't freeze on screen + concat_lines.append('file blank.png') + concat_text = '\n'.join(concat_lines) # Create ZIP archive with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf: @@ -123,8 +123,9 @@ def write( img_path = tmpDir + '/subtitle%04d.png' % i zf.write(img_path, f'{self.output_dir}/subtitle{i:04d}.png') - # Add filtergraph - zf.writestr(f'{self.output_dir}/filtergraph.txt', filtergraph) + # Add blank image and concat file + zf.write(tmpDir + '/blank.png', f'{self.output_dir}/blank.png') + zf.writestr(f'{self.output_dir}/concat.txt', concat_text) buf.seek(0) return buf.read() diff --git a/tests/test_filtergraph.py b/tests/test_filtergraph.py index d1fae66e..55fea253 100644 --- a/tests/test_filtergraph.py +++ b/tests/test_filtergraph.py @@ -28,10 +28,12 @@ def test_zip_contents(self): names = zf.namelist() assert 'embedded_subs/subtitle0001.png' in names assert 'embedded_subs/subtitle0002.png' in names + assert 'embedded_subs/blank.png' in names + assert 'embedded_subs/concat.txt' in names assert 'embedded_subs/filtergraph.txt' in names def test_filtergraph_structure(self): - """Test that filtergraph has correct structure.""" + """Test that filtergraph uses concat demuxer via single movie filter.""" srt_content = """1 00:00:01,000 --> 00:00:04,000 Test @@ -46,12 +48,50 @@ def test_filtergraph_structure(self): assert 'color=c=black@0:s=1920x1080' in filtergraph assert 'format=yuva444p' in filtergraph - # Should have movie input for image (path relative to ffmpeg working dir) - assert 'movie=embedded_subs/subtitle0001.png' in filtergraph + # Should use single movie filter with concat demuxer + assert 'movie=embedded_subs/concat.txt:f=concat' in filtergraph - # Should have overlay with timing - assert 'overlay=x=0:y=0:enable=' in filtergraph - assert "between(t,1.000,4.000)" in filtergraph + # Should have single overlay to [out] + assert '[base][subs]overlay=0:0:format=auto[out]' in filtergraph + + def test_concat_file_structure(self): + """Test that concat demuxer file has correct timing entries.""" + srt_content = """1 +00:00:01,000 --> 00:00:04,000 +First + +2 +00:00:05,000 --> 00:00:08,000 +Second +""" + caption_set = SRTReader().read(srt_content) + zip_data = self.writer.write(caption_set) + + with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: + concat = zf.read('embedded_subs/concat.txt').decode() + + assert 'ffconcat version 1.0' in concat + + # Gap before first subtitle (1 second) + assert 'file blank.png\nduration 1.000' in concat + + # First subtitle (3 seconds) + assert 'file subtitle0001.png\nduration 3.000' in concat + + # Gap between subtitles (1 second) + # After first sub ends at 4s, second starts at 5s + lines = concat.split('\n') + # Find the gap between subtitle0001 and subtitle0002 + idx_sub1 = next(i for i, l in enumerate(lines) if 'subtitle0001' in l) + # After subtitle0001 duration, should be blank with 1s gap + assert lines[idx_sub1 + 2] == 'file blank.png' + assert lines[idx_sub1 + 3] == 'duration 1.000' + + # Second subtitle (3 seconds) + assert 'file subtitle0002.png\nduration 3.000' in concat + + # Trailing blank at the end + assert lines[-1] == 'file blank.png' def test_custom_output_dir(self): """Test custom output directory.""" @@ -66,13 +106,15 @@ def test_custom_output_dir(self): with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: names = zf.namelist() assert 'subs/subtitle0001.png' in names + assert 'subs/blank.png' in names + assert 'subs/concat.txt' in names assert 'subs/filtergraph.txt' in names filtergraph = zf.read('subs/filtergraph.txt').decode() - assert 'movie=subs/subtitle0001.png' in filtergraph + assert 'movie=subs/concat.txt:f=concat' in filtergraph - def test_multiple_overlays_chained(self): - """Test that multiple subtitles are chained correctly.""" + def test_multiple_subtitles_in_concat(self): + """Test that multiple subtitles produce correct concat entries.""" srt_content = """1 00:00:01,000 --> 00:00:04,000 First @@ -89,20 +131,16 @@ def test_multiple_overlays_chained(self): zip_data = self.writer.write(caption_set) with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: - filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() + concat = zf.read('embedded_subs/concat.txt').decode() - # Should have 3 movie inputs - assert 'movie=embedded_subs/subtitle0001.png' in filtergraph - assert 'movie=embedded_subs/subtitle0002.png' in filtergraph - assert 'movie=embedded_subs/subtitle0003.png' in filtergraph + # All three subtitle images referenced + assert 'file subtitle0001.png' in concat + assert 'file subtitle0002.png' in concat + assert 'file subtitle0003.png' in concat - # Should have chained overlays - assert '[base][s1]overlay' in filtergraph - assert '[v1][s2]overlay' in filtergraph - assert '[v2][s3]overlay' in filtergraph - - # Last one should output to [out] - assert '[out]' in filtergraph + # Filtergraph should be simple - no per-subtitle movie filters + filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() + assert filtergraph.count('movie=') == 1 # single movie filter def test_duration_calculation(self): """Test that duration is calculated from last subtitle end time.""" @@ -136,3 +174,25 @@ def test_custom_resolution(self): with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() assert 's=1280x720' in filtergraph + + def test_back_to_back_subtitles_no_gap(self): + """Test subtitles with no gap between them produce no blank entry.""" + srt_content = """1 +00:00:01,000 --> 00:00:04,000 +First + +2 +00:00:04,000 --> 00:00:07,000 +Immediately after +""" + caption_set = SRTReader().read(srt_content) + zip_data = self.writer.write(caption_set) + + with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: + concat = zf.read('embedded_subs/concat.txt').decode() + lines = concat.split('\n') + + # Find subtitle0001 line + idx_sub1 = next(i for i, l in enumerate(lines) if 'subtitle0001' in l) + # Next file entry should be subtitle0002 directly (no blank in between) + assert lines[idx_sub1 + 2] == 'file subtitle0002.png' From 65d4e5e71362b2de91b89d3529856664f3b8cf92 Mon Sep 17 00:00:00 2001 From: Sebastian Annies Date: Tue, 10 Feb 2026 11:35:17 +0200 Subject: [PATCH 2/2] remove filtergraph - too many open files --- tests/test_filtergraph.py | 78 ++++++++++++++------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/tests/test_filtergraph.py b/tests/test_filtergraph.py index 55fea253..574a01b8 100644 --- a/tests/test_filtergraph.py +++ b/tests/test_filtergraph.py @@ -6,7 +6,7 @@ class TestFiltergraphWriterTestCase: - """Tests for the FiltergraphWriter that generates FFmpeg filtergraphs.""" + """Tests for the FiltergraphWriter that generates FFmpeg concat demuxer files.""" def setup_method(self): self.writer = FiltergraphWriter() @@ -30,10 +30,9 @@ def test_zip_contents(self): assert 'embedded_subs/subtitle0002.png' in names assert 'embedded_subs/blank.png' in names assert 'embedded_subs/concat.txt' in names - assert 'embedded_subs/filtergraph.txt' in names - def test_filtergraph_structure(self): - """Test that filtergraph uses concat demuxer via single movie filter.""" + def test_concat_header(self): + """Test that concat file starts with ffconcat header.""" srt_content = """1 00:00:01,000 --> 00:00:04,000 Test @@ -42,17 +41,8 @@ def test_filtergraph_structure(self): zip_data = self.writer.write(caption_set) with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: - filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() - - # Should have color source for transparent base - assert 'color=c=black@0:s=1920x1080' in filtergraph - assert 'format=yuva444p' in filtergraph - - # Should use single movie filter with concat demuxer - assert 'movie=embedded_subs/concat.txt:f=concat' in filtergraph - - # Should have single overlay to [out] - assert '[base][subs]overlay=0:0:format=auto[out]' in filtergraph + concat = zf.read('embedded_subs/concat.txt').decode() + assert concat.startswith('ffconcat version 1.0') def test_concat_file_structure(self): """Test that concat demuxer file has correct timing entries.""" @@ -70,8 +60,6 @@ def test_concat_file_structure(self): with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: concat = zf.read('embedded_subs/concat.txt').decode() - assert 'ffconcat version 1.0' in concat - # Gap before first subtitle (1 second) assert 'file blank.png\nduration 1.000' in concat @@ -79,11 +67,8 @@ def test_concat_file_structure(self): assert 'file subtitle0001.png\nduration 3.000' in concat # Gap between subtitles (1 second) - # After first sub ends at 4s, second starts at 5s lines = concat.split('\n') - # Find the gap between subtitle0001 and subtitle0002 idx_sub1 = next(i for i, l in enumerate(lines) if 'subtitle0001' in l) - # After subtitle0001 duration, should be blank with 1s gap assert lines[idx_sub1 + 2] == 'file blank.png' assert lines[idx_sub1 + 3] == 'duration 1.000' @@ -108,10 +93,6 @@ def test_custom_output_dir(self): assert 'subs/subtitle0001.png' in names assert 'subs/blank.png' in names assert 'subs/concat.txt' in names - assert 'subs/filtergraph.txt' in names - - filtergraph = zf.read('subs/filtergraph.txt').decode() - assert 'movie=subs/concat.txt:f=concat' in filtergraph def test_multiple_subtitles_in_concat(self): """Test that multiple subtitles produce correct concat entries.""" @@ -133,36 +114,12 @@ def test_multiple_subtitles_in_concat(self): with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: concat = zf.read('embedded_subs/concat.txt').decode() - # All three subtitle images referenced assert 'file subtitle0001.png' in concat assert 'file subtitle0002.png' in concat assert 'file subtitle0003.png' in concat - # Filtergraph should be simple - no per-subtitle movie filters - filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() - assert filtergraph.count('movie=') == 1 # single movie filter - - def test_duration_calculation(self): - """Test that duration is calculated from last subtitle end time.""" - srt_content = """1 -00:00:01,000 --> 00:00:04,000 -First - -2 -00:01:30,000 --> 00:01:35,500 -Last one at 1:35 -""" - caption_set = SRTReader().read(srt_content) - zip_data = self.writer.write(caption_set) - - with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: - filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() - - # Duration should be ~96.5 seconds (95.5 + 1 buffer) - assert 'd=96.500' in filtergraph - def test_custom_resolution(self): - """Test custom video resolution.""" + """Test custom video resolution produces correctly sized images.""" writer = FiltergraphWriter(video_width=1280, video_height=720) srt_content = """1 00:00:01,000 --> 00:00:04,000 @@ -172,8 +129,11 @@ def test_custom_resolution(self): zip_data = writer.write(caption_set) with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: - filtergraph = zf.read('embedded_subs/filtergraph.txt').decode() - assert 's=1280x720' in filtergraph + # Verify blank image has correct dimensions + from PIL import Image + blank_data = zf.read('embedded_subs/blank.png') + img = Image.open(BytesIO(blank_data)) + assert img.size == (1280, 720) def test_back_to_back_subtitles_no_gap(self): """Test subtitles with no gap between them produce no blank entry.""" @@ -196,3 +156,19 @@ def test_back_to_back_subtitles_no_gap(self): idx_sub1 = next(i for i, l in enumerate(lines) if 'subtitle0001' in l) # Next file entry should be subtitle0002 directly (no blank in between) assert lines[idx_sub1 + 2] == 'file subtitle0002.png' + + def test_subtitle_starting_at_zero(self): + """Test subtitle starting at time 0 produces no leading blank.""" + srt_content = """1 +00:00:00,000 --> 00:00:03,000 +Starts immediately +""" + caption_set = SRTReader().read(srt_content) + zip_data = self.writer.write(caption_set) + + with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf: + concat = zf.read('embedded_subs/concat.txt').decode() + lines = concat.split('\n') + + # First file entry should be the subtitle, not blank + assert lines[1] == 'file subtitle0001.png' \ No newline at end of file