From f0f4bf27e54405e5562a841e7d4a9982d9508d18 Mon Sep 17 00:00:00 2001 From: firestaerter3 <17737913+firestaerter3@users.noreply.github.com> Date: Fri, 6 Mar 2026 10:25:29 +0100 Subject: [PATCH 1/2] feat: add ffprobe metadata extraction for video profile, level, bit depth, ref frames and audio language Adds a new get_stream_metadata() function to stream_check_utils.py that runs a fast ffprobe JSON call (~2-3s) before the existing ffmpeg bitrate analysis. Extracts structured metadata not reliably available from ffmpeg debug output: Video: profile (High/Main), level (e.g. 42), bit depth (8/10), ref frame count Audio: language tag (ISO 639-2), sample rate, channel layout Results are merged into the analyze_stream() result dict and included in the stream_stats payload written to Dispatcharr by both _update_stream_stats() and _prepare_stream_stats_for_batch(). This enables downstream consumers (e.g. Emby/Jellyfin plugins) to make correct direct-play vs transcode decisions for HEVC 10-bit content, high ref-frame H.264, and multi-language streams without probing at playback time. Co-Authored-By: Claude Sonnet 4.6 --- backend/stream_check_utils.py | 132 +++++++++++++++++++++++++++++- backend/stream_checker_service.py | 28 +++++-- 2 files changed, 153 insertions(+), 7 deletions(-) diff --git a/backend/stream_check_utils.py b/backend/stream_check_utils.py index c5025f87..e9bf14b4 100644 --- a/backend/stream_check_utils.py +++ b/backend/stream_check_utils.py @@ -537,6 +537,129 @@ def get_stream_info_and_bitrate(url: str, duration: int = 30, timeout: int = 30, return result_data +def get_stream_metadata(url: str, timeout: int = 15, user_agent: str = 'VLC/3.0.14') -> Dict[str, Any]: + """ + Extract stream metadata using a quick ffprobe JSON call. + + Runs ffprobe once to collect structured metadata that ffmpeg debug output + does not expose reliably: H.264/HEVC profile, level, bit depth, reference + frames, audio language, sample rate, and channel layout. + + Args: + url: Stream URL to analyse + timeout: Timeout in seconds (default: 15) + user_agent: User-Agent string for HTTP requests + + Returns: + Dictionary with any of the following keys that could be extracted: + - video_profile (str): e.g. "High", "Main" + - video_level (int): raw ffprobe level integer, e.g. 42 for Level 4.2 + - video_bit_depth (int): e.g. 8, 10 + - video_ref_frames (int): reference frame count + - audio_language (str): ISO 639-2 language tag, e.g. "eng", "dut" + - sample_rate (int): audio sample rate in Hz, e.g. 48000 + - audio_channels (str): normalised channel layout, e.g. "5.1", "stereo" + Returns empty dict on any failure. + """ + command = [ + 'ffprobe', + '-user_agent', user_agent, + '-v', 'quiet', + '-print_format', 'json', + '-show_streams', + '-show_entries', + 'stream=codec_type,profile,level,bit_depth,refs,sample_rate,channels,channel_layout' + ':stream_tags=language', + '-read_ahead_limit', '5M', + '-analyzeduration', '5000000', + url, + ] + + try: + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=timeout, + text=True, + ) + + if not result.stdout: + logger.debug(f"ffprobe metadata: no output for {url[:50]}...") + return {} + + data = json.loads(result.stdout) + streams = data.get('streams', []) + metadata = {} + + # Video stream + video_stream = next((s for s in streams if s.get('codec_type') == 'video'), None) + if video_stream: + profile = video_stream.get('profile') + if profile and profile.lower() not in ('unknown', 'n/a', 'none', ''): + metadata['video_profile'] = profile + + level = video_stream.get('level') + if level is not None: + try: + lvl_int = int(level) + if lvl_int > 0: + metadata['video_level'] = lvl_int + except (ValueError, TypeError): + pass + + bit_depth = video_stream.get('bit_depth') + if bit_depth is not None: + try: + metadata['video_bit_depth'] = int(bit_depth) + except (ValueError, TypeError): + pass + + refs = video_stream.get('refs') + if refs is not None: + try: + metadata['video_ref_frames'] = int(refs) + except (ValueError, TypeError): + pass + + # Audio stream + audio_stream = next((s for s in streams if s.get('codec_type') == 'audio'), None) + if audio_stream: + tags = audio_stream.get('tags', {}) or {} + language = tags.get('language', '') + if language and language.lower() not in ('und', 'n/a', ''): + metadata['audio_language'] = language + + sample_rate = audio_stream.get('sample_rate') + if sample_rate: + try: + metadata['sample_rate'] = int(sample_rate) + except (ValueError, TypeError): + pass + + # Normalise channel_layout: strip trailing "(side)" / "(back)" qualifiers + # so "5.1(side)" becomes "5.1", which ParseAudioChannelCount can parse. + channel_layout = audio_stream.get('channel_layout', '') + if channel_layout and channel_layout.lower() not in ('unknown', ''): + paren = channel_layout.find('(') + normalised = channel_layout[:paren].strip() if paren > 0 else channel_layout.strip() + if normalised: + metadata['audio_channels'] = normalised + + logger.debug(f"ffprobe metadata for {url[:50]}...: {metadata}") + return metadata + + except subprocess.TimeoutExpired: + logger.warning(f"ffprobe metadata timeout ({timeout}s) for {url[:50]}...") + return {} + except json.JSONDecodeError as e: + logger.warning(f"ffprobe metadata JSON decode error for {url[:50]}...: {e}") + return {} + except Exception as e: + logger.warning(f"ffprobe metadata failed for {url[:50]}...: {e}") + return {} + + def get_stream_bitrate(url: str, duration: int = 30, timeout: int = 30, user_agent: str = 'VLC/3.0.14', stream_startup_buffer: int = 10) -> Tuple[Optional[float], str, float]: """ Get stream bitrate using ffmpeg to analyze actual stream data. @@ -762,6 +885,12 @@ def analyze_stream( stream_startup_buffer=stream_startup_buffer ) + # Augment with structured metadata from a fast ffprobe call. + # This adds profile, level, bit depth, ref frames, audio language, + # sample rate, and channel layout — fields not reliably extractable + # from ffmpeg debug output. + metadata = get_stream_metadata(stream_url, timeout=15, user_agent=user_agent) + # Build result dictionary with metadata result = { 'stream_id': stream_id, @@ -773,8 +902,9 @@ def analyze_stream( 'resolution': result_data['resolution'], 'fps': result_data['fps'], 'bitrate_kbps': result_data['bitrate_kbps'], - 'status': result_data['status'] + 'status': result_data['status'], } + result.update(metadata) # Log results # In debug mode, show detailed multi-line logs diff --git a/backend/stream_checker_service.py b/backend/stream_checker_service.py index b66f68d3..08ee79a5 100644 --- a/backend/stream_checker_service.py +++ b/backend/stream_checker_service.py @@ -1460,15 +1460,23 @@ def _update_stream_stats(self, stream_data: Dict) -> bool: "video_codec": stream_data.get("video_codec"), "audio_codec": stream_data.get("audio_codec"), "ffmpeg_output_bitrate": int(stream_data.get("bitrate_kbps")) if stream_data.get("bitrate_kbps") not in ["N/A", None] else None, + # Extended metadata from ffprobe + "video_profile": stream_data.get("video_profile"), + "video_level": stream_data.get("video_level"), + "video_bit_depth": stream_data.get("video_bit_depth"), + "video_ref_frames": stream_data.get("video_ref_frames"), + "audio_language": stream_data.get("audio_language"), + "sample_rate": stream_data.get("sample_rate"), + "audio_channels": stream_data.get("audio_channels"), } - + # Clean up the payload, removing any None values or N/A values stream_stats_payload = {k: v for k, v in stream_stats_payload.items() if v not in [None, "N/A"]} - + if not stream_stats_payload: logger.debug(f"No data to update for stream {stream_id}. Skipping.") return False - + # Construct the URL for the specific stream stream_url = f"{base_url}/api/channels/streams/{int(stream_id)}/" @@ -1535,15 +1543,23 @@ def _prepare_stream_stats_for_batch(self, stream_data: Dict) -> Optional[Dict[st "video_codec": stream_data.get("video_codec"), "audio_codec": stream_data.get("audio_codec"), "ffmpeg_output_bitrate": int(stream_data.get("bitrate_kbps")) if stream_data.get("bitrate_kbps") not in ["N/A", None] else None, + # Extended metadata from ffprobe + "video_profile": stream_data.get("video_profile"), + "video_level": stream_data.get("video_level"), + "video_bit_depth": stream_data.get("video_bit_depth"), + "video_ref_frames": stream_data.get("video_ref_frames"), + "audio_language": stream_data.get("audio_language"), + "sample_rate": stream_data.get("sample_rate"), + "audio_channels": stream_data.get("audio_channels"), } - + # Clean up the payload, removing any None values or N/A values stream_stats_payload = {k: v for k, v in stream_stats_payload.items() if v not in [None, "N/A"]} - + if not stream_stats_payload: logger.debug(f"No data to update for stream {stream_id}. Skipping.") return None - + return { 'stream_id': stream_id, 'stream_stats': stream_stats_payload From 736ccb683c11e3807c5a0769ef0637589b3a73a4 Mon Sep 17 00:00:00 2001 From: firestaerter3 <17737913+firestaerter3@users.noreply.github.com> Date: Fri, 6 Mar 2026 20:11:07 +0100 Subject: [PATCH 2/2] refactor: extract metadata from ffmpeg stderr instead of separate ffprobe call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the standalone ffprobe JSON call with inline parsing of ffmpeg's existing stderr output. All new fields (video_profile, video_bit_depth, audio_language, sample_rate, audio_channels) are extracted from the stream info lines that ffmpeg already prints during analysis — no second upstream connection is opened. video_level and video_ref_frames are dropped; these are not reliably present in ffmpeg stderr output. --- backend/stream_check_utils.py | 188 +++++++++--------------------- backend/stream_checker_service.py | 4 +- 2 files changed, 59 insertions(+), 133 deletions(-) diff --git a/backend/stream_check_utils.py b/backend/stream_check_utils.py index e9bf14b4..1851e732 100644 --- a/backend/stream_check_utils.py +++ b/backend/stream_check_utils.py @@ -362,7 +362,12 @@ def get_stream_info_and_bitrate(url: str, duration: int = 30, timeout: int = 30, 'fps': 0, 'bitrate_kbps': None, 'status': 'OK', - 'elapsed_time': 0 + 'elapsed_time': 0, + 'video_profile': None, + 'video_bit_depth': None, + 'audio_language': None, + 'sample_rate': None, + 'audio_channels': None, } # Add buffer to timeout to account for ffmpeg startup, network latency, and shutdown overhead @@ -435,6 +440,25 @@ def get_stream_info_and_bitrate(url: str, duration: int = 30, timeout: int = 30, if fps_match: result_data['fps'] = round(float(fps_match.group(1)), 2) logger.debug(f" → Detected FPS: {result_data['fps']}") + + # Extract video profile — e.g. "h264 (High)" or "hevc (Main 10)" + # Skip if it looks like a codec alias (contains / or 0x) + profile_match = re.search(r'Video:\s+\w+\s+\(([^)]+)\)', line) + if profile_match: + candidate = profile_match.group(1).strip() + if '/' not in candidate and '0x' not in candidate: + result_data['video_profile'] = candidate + logger.debug(f" → Detected video profile: {candidate}") + + # Extract bit depth from pixel format + # e.g. yuv420p10le → 10, yuv420p12le → 12, yuv420p → 8 + depth_match = re.search(r'yuv[j\d]+p(\d{2})', line) + if depth_match: + result_data['video_bit_depth'] = int(depth_match.group(1)) + logger.debug(f" → Detected bit depth: {result_data['video_bit_depth']}") + elif re.search(r'yuv[j\d]+p\b', line): + result_data['video_bit_depth'] = 8 + logger.debug(f" → Detected bit depth: 8 (standard yuv)") except (ValueError, AttributeError) as e: logger.debug(f" → Error parsing video stream line: {e}") @@ -454,6 +478,32 @@ def get_stream_info_and_bitrate(url: str, duration: int = 30, timeout: int = 30, if audio_codec != 'N/A': result_data['audio_codec'] = audio_codec logger.debug(f" → Final audio codec: {result_data['audio_codec']}") + + # Extract audio language from stream specifier e.g. "Stream #0:1(eng):" + lang_match = re.search(r'Stream #\d+:\d+\((\w+)\):', line) + if lang_match: + lang = lang_match.group(1) + if lang.lower() not in ('und', 'unknown'): + result_data['audio_language'] = lang + logger.debug(f" → Detected audio language: {lang}") + + # Extract sample rate e.g. "48000 Hz" + sr_match = re.search(r'(\d+)\s+Hz', line) + if sr_match: + result_data['sample_rate'] = int(sr_match.group(1)) + logger.debug(f" → Detected sample rate: {result_data['sample_rate']}") + + # Extract channel layout e.g. "stereo", "5.1(side)", "7.1" + # Strip trailing (side)/(back) qualifiers so "5.1(side)" → "5.1" + ch_match = re.search(r'\d+\s+Hz,\s+([^,\s]+)', line) + if ch_match: + layout = ch_match.group(1) + paren = layout.find('(') + if paren > 0: + layout = layout[:paren] + if layout: + result_data['audio_channels'] = layout.strip() + logger.debug(f" → Detected audio channels: {result_data['audio_channels']}") except (ValueError, AttributeError) as e: logger.debug(f" → Error parsing audio stream line: {e}") @@ -537,128 +587,6 @@ def get_stream_info_and_bitrate(url: str, duration: int = 30, timeout: int = 30, return result_data -def get_stream_metadata(url: str, timeout: int = 15, user_agent: str = 'VLC/3.0.14') -> Dict[str, Any]: - """ - Extract stream metadata using a quick ffprobe JSON call. - - Runs ffprobe once to collect structured metadata that ffmpeg debug output - does not expose reliably: H.264/HEVC profile, level, bit depth, reference - frames, audio language, sample rate, and channel layout. - - Args: - url: Stream URL to analyse - timeout: Timeout in seconds (default: 15) - user_agent: User-Agent string for HTTP requests - - Returns: - Dictionary with any of the following keys that could be extracted: - - video_profile (str): e.g. "High", "Main" - - video_level (int): raw ffprobe level integer, e.g. 42 for Level 4.2 - - video_bit_depth (int): e.g. 8, 10 - - video_ref_frames (int): reference frame count - - audio_language (str): ISO 639-2 language tag, e.g. "eng", "dut" - - sample_rate (int): audio sample rate in Hz, e.g. 48000 - - audio_channels (str): normalised channel layout, e.g. "5.1", "stereo" - Returns empty dict on any failure. - """ - command = [ - 'ffprobe', - '-user_agent', user_agent, - '-v', 'quiet', - '-print_format', 'json', - '-show_streams', - '-show_entries', - 'stream=codec_type,profile,level,bit_depth,refs,sample_rate,channels,channel_layout' - ':stream_tags=language', - '-read_ahead_limit', '5M', - '-analyzeduration', '5000000', - url, - ] - - try: - result = subprocess.run( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - timeout=timeout, - text=True, - ) - - if not result.stdout: - logger.debug(f"ffprobe metadata: no output for {url[:50]}...") - return {} - - data = json.loads(result.stdout) - streams = data.get('streams', []) - metadata = {} - - # Video stream - video_stream = next((s for s in streams if s.get('codec_type') == 'video'), None) - if video_stream: - profile = video_stream.get('profile') - if profile and profile.lower() not in ('unknown', 'n/a', 'none', ''): - metadata['video_profile'] = profile - - level = video_stream.get('level') - if level is not None: - try: - lvl_int = int(level) - if lvl_int > 0: - metadata['video_level'] = lvl_int - except (ValueError, TypeError): - pass - - bit_depth = video_stream.get('bit_depth') - if bit_depth is not None: - try: - metadata['video_bit_depth'] = int(bit_depth) - except (ValueError, TypeError): - pass - - refs = video_stream.get('refs') - if refs is not None: - try: - metadata['video_ref_frames'] = int(refs) - except (ValueError, TypeError): - pass - - # Audio stream - audio_stream = next((s for s in streams if s.get('codec_type') == 'audio'), None) - if audio_stream: - tags = audio_stream.get('tags', {}) or {} - language = tags.get('language', '') - if language and language.lower() not in ('und', 'n/a', ''): - metadata['audio_language'] = language - - sample_rate = audio_stream.get('sample_rate') - if sample_rate: - try: - metadata['sample_rate'] = int(sample_rate) - except (ValueError, TypeError): - pass - - # Normalise channel_layout: strip trailing "(side)" / "(back)" qualifiers - # so "5.1(side)" becomes "5.1", which ParseAudioChannelCount can parse. - channel_layout = audio_stream.get('channel_layout', '') - if channel_layout and channel_layout.lower() not in ('unknown', ''): - paren = channel_layout.find('(') - normalised = channel_layout[:paren].strip() if paren > 0 else channel_layout.strip() - if normalised: - metadata['audio_channels'] = normalised - - logger.debug(f"ffprobe metadata for {url[:50]}...: {metadata}") - return metadata - - except subprocess.TimeoutExpired: - logger.warning(f"ffprobe metadata timeout ({timeout}s) for {url[:50]}...") - return {} - except json.JSONDecodeError as e: - logger.warning(f"ffprobe metadata JSON decode error for {url[:50]}...: {e}") - return {} - except Exception as e: - logger.warning(f"ffprobe metadata failed for {url[:50]}...: {e}") - return {} - def get_stream_bitrate(url: str, duration: int = 30, timeout: int = 30, user_agent: str = 'VLC/3.0.14', stream_startup_buffer: int = 10) -> Tuple[Optional[float], str, float]: """ @@ -885,13 +813,7 @@ def analyze_stream( stream_startup_buffer=stream_startup_buffer ) - # Augment with structured metadata from a fast ffprobe call. - # This adds profile, level, bit depth, ref frames, audio language, - # sample rate, and channel layout — fields not reliably extractable - # from ffmpeg debug output. - metadata = get_stream_metadata(stream_url, timeout=15, user_agent=user_agent) - - # Build result dictionary with metadata + # Build result dictionary result = { 'stream_id': stream_id, 'stream_name': stream_name, @@ -903,8 +825,12 @@ def analyze_stream( 'fps': result_data['fps'], 'bitrate_kbps': result_data['bitrate_kbps'], 'status': result_data['status'], + 'video_profile': result_data.get('video_profile'), + 'video_bit_depth': result_data.get('video_bit_depth'), + 'audio_language': result_data.get('audio_language'), + 'sample_rate': result_data.get('sample_rate'), + 'audio_channels': result_data.get('audio_channels'), } - result.update(metadata) # Log results # In debug mode, show detailed multi-line logs diff --git a/backend/stream_checker_service.py b/backend/stream_checker_service.py index 08ee79a5..8804649a 100644 --- a/backend/stream_checker_service.py +++ b/backend/stream_checker_service.py @@ -1460,7 +1460,7 @@ def _update_stream_stats(self, stream_data: Dict) -> bool: "video_codec": stream_data.get("video_codec"), "audio_codec": stream_data.get("audio_codec"), "ffmpeg_output_bitrate": int(stream_data.get("bitrate_kbps")) if stream_data.get("bitrate_kbps") not in ["N/A", None] else None, - # Extended metadata from ffprobe + # Extended stream metadata "video_profile": stream_data.get("video_profile"), "video_level": stream_data.get("video_level"), "video_bit_depth": stream_data.get("video_bit_depth"), @@ -1543,7 +1543,7 @@ def _prepare_stream_stats_for_batch(self, stream_data: Dict) -> Optional[Dict[st "video_codec": stream_data.get("video_codec"), "audio_codec": stream_data.get("audio_codec"), "ffmpeg_output_bitrate": int(stream_data.get("bitrate_kbps")) if stream_data.get("bitrate_kbps") not in ["N/A", None] else None, - # Extended metadata from ffprobe + # Extended stream metadata "video_profile": stream_data.get("video_profile"), "video_level": stream_data.get("video_level"), "video_bit_depth": stream_data.get("video_bit_depth"),