karilaa-dev · karilaa-dev · Jan 16, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/tiktok_api/client.py b/tiktok_api/client.py
@@ -29,10 +29,22 @@
 # This ensures impersonation targets update automatically with yt-dlp
 try:
     from yt_dlp.networking._curlcffi import BROWSER_TARGETS, _TARGETS_COMPAT_LOOKUP
+    from yt_dlp.networking.impersonate import ImpersonateTarget
 except ImportError:
     # Fallback if yt-dlp structure changes or curl_cffi not available during import
     BROWSER_TARGETS = {}
     _TARGETS_COMPAT_LOOKUP = {}
+    ImpersonateTarget = None
+
+# Note: yt-dlp's CurlCFFIRH already handles proxies correctly per-request via
+# session.curl.setopt(CurlOpt.PROXY, proxy) in _send(). No monkey-patching needed.
+# The session caching by cookiejar is fine because proxy is set on each request.
+
+# TikTok WAF blocks newer Chrome versions (136+) when used with proxies due to
+# TLS fingerprint / User-Agent mismatches. Use Chrome 120 which is known to work.
+# The User-Agent must match the impersonation target to avoid WAF detection.
+TIKTOK_IMPERSONATE_TARGET = "chrome120"
+TIKTOK_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 
 from .exceptions import (
     TikTokDeletedError,
@@ -179,112 +191,76 @@ class TikTokClient:
     _aiohttp_connector: Optional[TCPConnector] = None
     _connector_lock = threading.Lock()
 
-    # curl_cffi session for browser-impersonated media downloads
-    _curl_session: Optional[CurlAsyncSession] = None
+    # curl_cffi session pool for browser-impersonated media downloads
+    # Keyed by proxy URL (None for direct connection) to avoid proxy contamination
+    _curl_session_pool: dict[Optional[str], CurlAsyncSession] = {}
     _curl_session_lock = threading.Lock()
     _impersonate_target: Optional[str] = None
 
     @classmethod
     def _get_impersonate_target(cls) -> str:
-        """Get the best impersonation target from yt-dlp's BROWSER_TARGETS.
+        """Get the impersonation target for TikTok requests.
 
-        Uses the same priority as yt-dlp:
-        1. Prioritize desktop over mobile (non-ios, non-android)
-        2. Prioritize Chrome > Safari > Firefox > Edge > Tor
-        3. Prioritize newest version
-
-        This ensures the impersonation target updates automatically when you
-        update yt-dlp, without any hardcoded values.
+        TikTok's WAF blocks newer Chrome versions (136+) when used with proxies
+        due to TLS fingerprint / User-Agent mismatches. Chrome 120 is known to
+        work reliably with proxies.
 
         Returns:
-            curl_cffi-compatible impersonate string (e.g., "chrome136")
+            curl_cffi-compatible impersonate string (e.g., "chrome120")
         """
-        import itertools
-
-        # Get curl_cffi version as tuple for comparison
-        try:
-            curl_cffi_version = tuple(
-                int(x) for x in curl_cffi.__version__.split(".")[:2]
-            )
-        except (ValueError, AttributeError):
-            curl_cffi_version = (0, 9)  # Minimum supported version
-
-        # Collect all available targets for our curl_cffi version
-        available_targets: dict[str, Any] = {}
-        for version, targets in BROWSER_TARGETS.items():
-            if curl_cffi_version >= version:
-                available_targets.update(targets)
-
-        if not available_targets:
-            # Fallback to a common target if BROWSER_TARGETS is empty
-            logger.warning(
-                "No BROWSER_TARGETS available from yt-dlp, using 'chrome' fallback"
-            )
-            return "chrome"
-
-        # Sort by yt-dlp's priority (same logic as _curlcffi.py)
-        # This ensures we pick the same target yt-dlp would use
-        sorted_targets = sorted(
-            available_targets.items(),
-            key=lambda x: (
-                # deprioritize mobile targets since they give very different behavior
-                x[1].os not in ("ios", "android"),
-                # prioritize tor < edge < firefox < safari < chrome
-                ("tor", "edge", "firefox", "safari", "chrome").index(x[1].client)
-                if x[1].client in ("tor", "edge", "firefox", "safari", "chrome")
-                else -1,
-                # prioritize newest version
-                float(x[1].version) if x[1].version else 0,
-                # group by os name
-                x[1].os or "",
-            ),
-            reverse=True,
-        )
-
-        # Get the best target name
-        best_name = sorted_targets[0][0]
-
-        # Apply compatibility lookup for older curl_cffi versions
-        if curl_cffi_version < (0, 11):
-            best_name = _TARGETS_COMPAT_LOOKUP.get(best_name, best_name)
-
+        # Use fixed Chrome 120 target that works with TikTok's WAF
+        # This must match TIKTOK_IMPERSONATE_TARGET and TIKTOK_USER_AGENT
         logger.debug(
-            f"Selected impersonation target: {best_name} "
+            f"Using impersonation target: {TIKTOK_IMPERSONATE_TARGET} "
             f"(curl_cffi {curl_cffi.__version__})"
         )
-        return best_name
+        return TIKTOK_IMPERSONATE_TARGET
 
     @classmethod
-    def _get_curl_session(cls) -> CurlAsyncSession:
-        """Get or create shared curl_cffi AsyncSession with browser impersonation.
+    def _get_curl_session(cls, proxy: Optional[str] = None) -> CurlAsyncSession:
+        """Get or create curl_cffi AsyncSession for a specific proxy.
+
+        Sessions are pooled by proxy URL to avoid proxy contamination.
+        curl_cffi bakes the proxy into the session at creation time, so we need
+        separate sessions for different proxies.
 
         The session uses yt-dlp's BROWSER_TARGETS to select the best impersonation
         target, ensuring TLS fingerprint matches a real browser.
+
+        Args:
+            proxy: Proxy URL string, or None for direct connection.
+
+        Returns:
+            CurlAsyncSession configured with the specified proxy.
         """
         with cls._curl_session_lock:
-            # Check if session needs to be created
-            # Note: CurlAsyncSession doesn't have is_closed, we track via _curl_session being None
-            if cls._curl_session is None:
-                pool_size = 10000  # High value for maximum throughput
-                cls._impersonate_target = cls._get_impersonate_target()
-                cls._curl_session = CurlAsyncSession(
+            # Check if session exists for this proxy
+            if proxy not in cls._curl_session_pool:
+                pool_size = 1000  # Per-proxy pool size
+                if cls._impersonate_target is None:
+                    cls._impersonate_target = cls._get_impersonate_target()
+
+                # Create session with proxy baked in at construction time
+                cls._curl_session_pool[proxy] = CurlAsyncSession(
                     impersonate=cls._impersonate_target,
+                    proxy=proxy,  # curl_cffi converts this to {"all": proxy}
                     max_clients=pool_size,
                 )
                 logger.info(
-                    f"Created curl_cffi session with impersonate={cls._impersonate_target}, "
-                    f"max_clients={pool_size}"
+                    f"Created curl_cffi session for proxy={_strip_proxy_auth(proxy)}, "
+                    f"impersonate={cls._impersonate_target}, max_clients={pool_size}"
                 )
-            return cls._curl_session
+            return cls._curl_session_pool[proxy]
 
     @classmethod
     async def close_curl_session(cls) -> None:
-        """Close shared curl_cffi session. Call on application shutdown."""
+        """Close all curl_cffi sessions in the pool. Call on application shutdown."""
         with cls._curl_session_lock:
-            session = cls._curl_session
-            cls._curl_session = None
+            sessions = list(cls._curl_session_pool.values())
+            cls._curl_session_pool.clear()
             cls._impersonate_target = None
-        if session is not None:
+
+        for session in sessions:
             try:
                 await session.close()
             except Exception as e:
@@ -382,10 +358,11 @@ def _get_proxy_info(self) -> str:
         return "None"
 
     def _get_bypass_headers(self, referer_url: str) -> dict[str, str]:
-        """Get bypass headers dynamically from yt-dlp.
+        """Get bypass headers for TikTok media downloads.
 
-        Uses yt-dlp's standard headers which are updated with each yt-dlp release.
-        We add Origin and Referer for CORS compliance with TikTok CDN.
+        Uses headers matching our impersonation target (Chrome 120) to avoid
+        TikTok WAF detection. The User-Agent must match the curl_cffi
+        impersonation target.
 
         Args:
             referer_url: The referer URL to set in headers
@@ -394,6 +371,8 @@ def _get_bypass_headers(self, referer_url: str) -> dict[str, str]:
             Dict of headers for media download
         """
         headers = dict(YTDLP_STD_HEADERS)  # Copy to avoid mutation
+        # Override User-Agent to match our impersonation target
+        headers["User-Agent"] = TIKTOK_USER_AGENT
         headers["Referer"] = referer_url
         headers["Origin"] = "https://www.tiktok.com"
         headers["Accept"] = "*/*"
@@ -487,17 +466,21 @@ async def _download_media_async(
         if not self.data_only_proxy:
             proxy = download_context.get("proxy")
 
-        session = self._get_curl_session()
+        # Get session with proxy baked in (avoids proxy contamination between requests)
+        session = self._get_curl_session(proxy=proxy)
 
         for attempt in range(1, max_retries + 1):
-            logger.debug(f"CDN download attempt {attempt}/{max_retries} for media URL")
+            logger.debug(
+                f"CDN download attempt {attempt}/{max_retries} for media URL "
+                f"via {_strip_proxy_auth(proxy)}"
+            )
             response = None
             try:
+                # Note: proxy is already configured in the session (baked in at creation)
                 response = await session.get(
                     media_url,
                     headers=headers,
                     cookies=cookies,
-                    proxy=proxy,
                     timeout=60,
                     allow_redirects=True,
                     stream=use_streaming,
@@ -770,6 +753,13 @@ def _get_ydl_opts(
             "no_warnings": True,
         }
 
+        # Set impersonation target and matching User-Agent to avoid TikTok WAF detection.
+        # TikTok blocks newer Chrome versions (136+) when used with proxies due to
+        # TLS fingerprint mismatches. Chrome 120 is known to work reliably.
+        if ImpersonateTarget is not None:
+            opts["impersonate"] = ImpersonateTarget("chrome", "120", "macos", None)
+        opts["http_headers"] = {"User-Agent": TIKTOK_USER_AGENT}
+
         # Use explicit proxy decision if it was provided (even if None = direct connection)
         if explicit_proxy is not ...:
             if explicit_proxy is not None:
@@ -848,62 +838,27 @@ def _extract_with_context_sync(
             try:
                 # Use yt-dlp's internal method to get raw webpage data
                 # This also sets up all necessary cookies
-                # NOTE: TikTok's impersonate feature doesn't work through HTTP proxies.
-                # Always use direct connection for extraction, proxy is used for downloads.
-                saved_proxy = None  # Will store proxy for download context
-                if self.proxy_manager and self.proxy_manager.has_proxies():
-                    # Download webpage without proxy but with impersonate
-                    # Save current proxy setting and temporarily disable it
-                    saved_proxy = ydl_opts.get("proxy")
-                    if "proxy" in ydl_opts:
-                        del ydl_opts["proxy"]
-                    # Recreate YDL without proxy for extraction
-                    # Create new instance first to ensure we have a valid ydl
-                    # even if something goes wrong during recreation
-                    old_ydl = ydl
-                    ydl = yt_dlp.YoutubeDL(ydl_opts)
-                    old_ydl.close()  # Close old instance after new one is ready
-                    ie = ydl.get_info_extractor("TikTok")
-                    ie.set_downloader(ydl)
-
-                    # Use standard extraction with impersonate (no proxy)
-                    video_data, status = ie._extract_web_data_and_status(
-                        normalized_url, video_id
-                    )
-
-                    # Check TikTok status codes for errors
-                    # 10204 = Video not found / deleted
-                    # 10216 = Video under review
-                    # 10222 = Private video
-                    if status == 10204:
-                        return None, "deleted", None
-                    elif status == 10222:
-                        return None, "private", None
-                    elif status == 10216:
-                        return None, "deleted", None  # Treat under review as deleted
-
-                    # Validate that we got video data
-                    if not video_data:
-                        logger.error(f"No video data returned for {video_id} (status={status})")
-                        return None, "extraction", None
-                else:
-                    # No proxy, use the standard method with impersonate
-                    video_data, status = ie._extract_web_data_and_status(
-                        normalized_url, video_id
-                    )
+                # NOTE: Always use proxy for extraction if configured, as datacenter
+                # IPs are typically blocked by TikTok.
+                video_data, status = ie._extract_web_data_and_status(
+                    normalized_url, video_id
+                )
 
-                    # Check TikTok status codes for errors (same as proxy path)
-                    if status == 10204:
-                        return None, "deleted", None
-                    elif status == 10222:
-                        return None, "private", None
-                    elif status == 10216:
-                        return None, "deleted", None  # Treat under review as deleted
-
-                    # Validate that we got video data
-                    if not video_data:
-                        logger.error(f"No video data returned for {video_id} (status={status})")
-                        return None, "extraction", None
+                # Check TikTok status codes for errors
+                # 10204 = Video not found / deleted
+                # 10216 = Video under review
+                # 10222 = Private video
+                if status == 10204:
+                    return None, "deleted", None
+                elif status == 10222:
+                    return None, "private", None
+                elif status == 10216:
+                    return None, "deleted", None  # Treat under review as deleted
+
+                # Validate that we got video data
+                if not video_data:
+                    logger.error(f"No video data returned for {video_id} (status={status})")
+                    return None, "extraction", None
             except AttributeError as e:
                 logger.error(
                     f"Failed to call yt-dlp internal method: {e}. "
@@ -915,18 +870,12 @@ def _extract_with_context_sync(
                 ) from e
 
             # Create download context with the live instances
-            # For proxy path, use the saved_proxy (extraction was without proxy, downloads use proxy)
-            # For non-proxy path, use request_proxy as before
-            context_proxy = (
-                saved_proxy
-                if self.proxy_manager and self.proxy_manager.has_proxies()
-                else request_proxy
-            )
+            # Use the same proxy for downloads that was used for extraction
             download_context = {
                 "ydl": ydl,
                 "ie": ie,
                 "referer_url": url,
-                "proxy": context_proxy,  # Store proxy for per-request assignment
+                "proxy": request_proxy if request_proxy is not ... else None,
             }
 
             # Success - transfer ownership of ydl to caller via download_context
@@ -1458,15 +1407,16 @@ async def detect_image_format(self, image_url: str, video_info: VideoInfo) -> st
         if not self.data_only_proxy:
             proxy = video_info._download_context.get("proxy")
 
-        session = self._get_curl_session()
+        # Get session with proxy baked in (avoids proxy contamination between requests)
+        session = self._get_curl_session(proxy=proxy)
 
         response = None
         try:
+            # Note: proxy is already configured in the session (baked in at creation)
             response = await session.get(
                 image_url,
                 headers=headers,
                 cookies=cookies,
-                proxy=proxy,
                 timeout=10,
                 allow_redirects=True,
             )