diff --git a/src/intelstream/adapters/strategies/llm_extraction.py b/src/intelstream/adapters/strategies/llm_extraction.py index 1901b73..4fecc25 100644 --- a/src/intelstream/adapters/strategies/llm_extraction.py +++ b/src/intelstream/adapters/strategies/llm_extraction.py @@ -80,14 +80,8 @@ async def discover( if isinstance(posts_data, list): posts = [] for p in posts_data: - if ( - isinstance(p, dict) - and isinstance(p.get("url"), str) - and p.get("url") - ): - posts.append( - DiscoveredPost(url=p["url"], title=p.get("title", "")) - ) + if isinstance(p, dict) and isinstance(p.get("url"), str) and p.get("url"): + posts.append(DiscoveredPost(url=p["url"], title=p.get("title", ""))) if posts: logger.debug( "Using cached LLM extraction", @@ -131,12 +125,7 @@ def _get_content_hash(self, html: str) -> str: ): tag.decompose() - main = ( - soup.find("main") - or soup.find("article") - or soup.find(id="content") - or soup.body - ) + main = soup.find("main") or soup.find("article") or soup.find(id="content") or soup.body if main: text = " ".join(main.get_text().split()) @@ -150,16 +139,10 @@ async def _fetch_html(self, url: str) -> str | None: } try: if self._http_client: - response = await self._http_client.get( - url, headers=headers, follow_redirects=True - ) + response = await self._http_client.get(url, headers=headers, follow_redirects=True) else: - async with httpx.AsyncClient( - timeout=get_settings().http_timeout_seconds - ) as client: - response = await client.get( - url, headers=headers, follow_redirects=True - ) + async with httpx.AsyncClient(timeout=get_settings().http_timeout_seconds) as client: + response = await client.get(url, headers=headers, follow_redirects=True) response.raise_for_status() return response.text except httpx.HTTPError as e: @@ -169,9 +152,7 @@ async def _fetch_html(self, url: str) -> str | None: def _clean_html(self, html: str) -> str: soup = BeautifulSoup(html, "lxml") - for tag in soup.find_all( - ["script", "style", "noscript", "svg", "path", "iframe"] - ): + for tag in soup.find_all(["script", "style", "noscript", "svg", "path", "iframe"]): tag.decompose() for tag in soup.find_all(True): @@ -286,7 +267,5 @@ def parse_and_validate(data: str) -> list[dict[str, str]] | None: if result is not None: return result - logger.warning( - "Failed to extract JSON from LLM response", response_preview=text[:200] - ) + logger.warning("Failed to extract JSON from LLM response", response_preview=text[:200]) return [] diff --git a/src/intelstream/config.py b/src/intelstream/config.py index 49f3d8a..79f19d9 100644 --- a/src/intelstream/config.py +++ b/src/intelstream/config.py @@ -34,20 +34,12 @@ class Settings(BaseSettings): description="LLM provider for summarization: anthropic, openai, gemini, or kimi", ) - anthropic_api_key: str | None = Field( - default=None, description="Anthropic API key for Claude" - ) + anthropic_api_key: str | None = Field(default=None, description="Anthropic API key for Claude") openai_api_key: str | None = Field(default=None, description="OpenAI API key") - gemini_api_key: str | None = Field( - default=None, description="Google Gemini API key" - ) - kimi_api_key: str | None = Field( - default=None, description="Kimi (Moonshot AI) API key" - ) + gemini_api_key: str | None = Field(default=None, description="Google Gemini API key") + kimi_api_key: str | None = Field(default=None, description="Kimi (Moonshot AI) API key") - youtube_api_key: str | None = Field( - default=None, description="YouTube Data API key (optional)" - ) + youtube_api_key: str | None = Field(default=None, description="YouTube Data API key (optional)") twitter_bearer_token: str | None = Field( default=None, diff --git a/src/intelstream/discord/cogs/search.py b/src/intelstream/discord/cogs/search.py index bf0ec8f..342b48a 100644 --- a/src/intelstream/discord/cogs/search.py +++ b/src/intelstream/discord/cogs/search.py @@ -72,12 +72,11 @@ async def search(self, interaction: discord.Interaction, query: str) -> None: title = _truncate(item.title, 100) preview = _truncate(item.summary or "", MAX_SUMMARY_PREVIEW) - score_pct = f"{result.score * 100:.0f}%" value_parts = [] if item.original_url: value_parts.append(f"[Link]({item.original_url})") - value_parts.append(f"Relevance: {score_pct}") + value_parts.append(f"Similarity score: {result.score:.2f}") if preview: value_parts.append(preview) diff --git a/src/intelstream/services/page_analyzer.py b/src/intelstream/services/page_analyzer.py index 2fc5b8c..cd36349 100644 --- a/src/intelstream/services/page_analyzer.py +++ b/src/intelstream/services/page_analyzer.py @@ -121,9 +121,7 @@ async def analyze(self, url: str) -> ExtractionProfile: validation_result = self._validate_profile(html, profile) if not validation_result["valid"]: - raise PageAnalysisError( - f"Profile validation failed: {validation_result['reason']}" - ) + raise PageAnalysisError(f"Profile validation failed: {validation_result['reason']}") logger.info( "Page analysis complete", @@ -141,24 +139,16 @@ async def _fetch_html(self, url: str) -> str: try: if self._http_client: - response = await self._http_client.get( - url, headers=headers, follow_redirects=True - ) + response = await self._http_client.get(url, headers=headers, follow_redirects=True) else: - async with httpx.AsyncClient( - timeout=get_settings().http_timeout_seconds - ) as client: - response = await client.get( - url, headers=headers, follow_redirects=True - ) + async with httpx.AsyncClient(timeout=get_settings().http_timeout_seconds) as client: + response = await client.get(url, headers=headers, follow_redirects=True) response.raise_for_status() return response.text except httpx.HTTPStatusError as e: - raise PageAnalysisError( - f"Failed to fetch page: HTTP {e.response.status_code}" - ) from e + raise PageAnalysisError(f"Failed to fetch page: HTTP {e.response.status_code}") from e except httpx.RequestError as e: raise PageAnalysisError(f"Failed to fetch page: {e}") from e @@ -255,9 +245,7 @@ async def _extract_profile_with_llm(self, url: str, html: str) -> dict[str, Any] logger.error("Anthropic API error during page analysis", error=str(e)) raise PageAnalysisError(f"LLM API error: {e}") from e - def _validate_profile( - self, html: str, profile: ExtractionProfile - ) -> dict[str, Any]: + def _validate_profile(self, html: str, profile: ExtractionProfile) -> dict[str, Any]: soup = BeautifulSoup(html, "lxml") try: diff --git a/tests/test_config.py b/tests/test_config.py index 045af15..38c61ef 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -28,9 +28,7 @@ def test_settings_from_env(self, monkeypatch: pytest.MonkeyPatch) -> None: assert settings.default_poll_interval_minutes == 5 assert settings.log_level == "INFO" - def test_settings_with_optional_youtube( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_settings_with_optional_youtube(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_CHANNEL_ID", "987654321") @@ -42,9 +40,7 @@ def test_settings_with_optional_youtube( assert settings.youtube_api_key == "yt-api-key" - def test_settings_poll_interval_bounds( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_settings_poll_interval_bounds(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_CHANNEL_ID", "987654321") @@ -103,9 +99,7 @@ def test_repr_handles_none_keys(self, monkeypatch: pytest.MonkeyPatch) -> None: assert "youtube_api_key=None" in repr_str assert "openai_api_key=None" in repr_str - def test_empty_discord_bot_token_rejected( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_empty_discord_bot_token_rejected(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_OWNER_ID", "111222333") @@ -137,9 +131,7 @@ def test_llm_api_key_returns_correct_provider_key( settings = Settings(_env_file=None) assert settings.llm_api_key == "sk-openai-test" - def test_llm_api_key_raises_when_key_missing( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_llm_api_key_raises_when_key_missing(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_OWNER_ID", "111222333") @@ -149,9 +141,7 @@ def test_llm_api_key_raises_when_key_missing( with pytest.raises(ValidationError, match="No API key configured"): Settings(_env_file=None) - def test_invalid_llm_provider_rejected( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_invalid_llm_provider_rejected(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_OWNER_ID", "111222333") @@ -161,9 +151,7 @@ def test_invalid_llm_provider_rejected( with pytest.raises(ValidationError): Settings(_env_file=None) - def test_valid_llm_providers_accepted( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_valid_llm_providers_accepted(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_OWNER_ID", "111222333") @@ -181,9 +169,7 @@ def test_valid_llm_providers_accepted( assert settings.llm_api_key == key_val monkeypatch.delenv(key_env, raising=False) - def test_missing_api_key_fails_at_construction( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_missing_api_key_fails_at_construction(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DISCORD_BOT_TOKEN", "test_token") monkeypatch.setenv("DISCORD_GUILD_ID", "123456789") monkeypatch.setenv("DISCORD_OWNER_ID", "111222333") @@ -261,9 +247,7 @@ def test_explicit_model_overrides_provider_default( assert settings.summary_model == "my-custom-model" assert settings.summary_model_interactive == "my-custom-interactive" - def test_partial_override_uses_default_for_unset( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_partial_override_uses_default_for_unset(self, monkeypatch: pytest.MonkeyPatch) -> None: self._base_env(monkeypatch) monkeypatch.setenv("LLM_PROVIDER", "openai") monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-test") @@ -301,9 +285,7 @@ def test_falls_back_to_default(self, monkeypatch: pytest.MonkeyPatch) -> None: assert settings.get_poll_interval(SourceType.YOUTUBE) == 10 assert settings.get_poll_interval(SourceType.RSS) == 10 - def test_type_specific_overrides_default( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_type_specific_overrides_default(self, monkeypatch: pytest.MonkeyPatch) -> None: self._base_env(monkeypatch) monkeypatch.setenv("DEFAULT_POLL_INTERVAL_MINUTES", "5") monkeypatch.setenv("TWITTER_POLL_INTERVAL_MINUTES", "20") @@ -329,9 +311,7 @@ def test_returns_parent_directory_for_sqlite_file(self) -> None: assert result == Path("./data") def test_returns_parent_for_absolute_path(self) -> None: - result = get_database_directory( - "sqlite+aiosqlite:////home/user/data/intelstream.db" - ) + result = get_database_directory("sqlite+aiosqlite:////home/user/data/intelstream.db") assert result == Path("/home/user/data") def test_returns_none_for_memory_database(self) -> None: diff --git a/tests/test_discord/test_channel_summary.py b/tests/test_discord/test_channel_summary.py index 481351f..82453f0 100644 --- a/tests/test_discord/test_channel_summary.py +++ b/tests/test_discord/test_channel_summary.py @@ -170,9 +170,7 @@ async def test_summary_with_different_channel(self, cog, mock_interaction): messages = [_make_message(f"msg {i}", f"user{i}") for i in range(6)] target_channel.history = MagicMock(return_value=_async_iter(messages)) - await cog.summary.callback( - cog, mock_interaction, count=200, channel=target_channel - ) + await cog.summary.callback(cog, mock_interaction, count=200, channel=target_channel) target_channel.history.assert_called_once() sent_text = mock_interaction.followup.send.call_args.args[0] @@ -183,19 +181,14 @@ async def test_summary_handles_summarization_failure(self, cog, mock_interaction channel = mock_interaction.channel channel.history = MagicMock(return_value=_async_iter(messages)) - cog._summarizer.summarize_chat = AsyncMock( - side_effect=SummarizationError("API error") - ) + cog._summarizer.summarize_chat = AsyncMock(side_effect=SummarizationError("API error")) await cog.summary.callback(cog, mock_interaction, count=200, channel=None) mock_interaction.followup.send.assert_called_once() sent_kwargs = mock_interaction.followup.send.call_args.kwargs assert sent_kwargs.get("ephemeral") is True - assert ( - "Failed to generate summary" - in mock_interaction.followup.send.call_args.args[0] - ) + assert "Failed to generate summary" in mock_interaction.followup.send.call_args.args[0] async def test_summary_filters_empty_messages(self, cog, mock_interaction): messages = [ diff --git a/tests/test_discord/test_content_posting.py b/tests/test_discord/test_content_posting.py index 2bdc89c..f2eb8fd 100644 --- a/tests/test_discord/test_content_posting.py +++ b/tests/test_discord/test_content_posting.py @@ -159,9 +159,7 @@ async def test_cog_unload_closes_summarizer(self, _patch_cog_deps, mock_bot): class TestContentLoop: - async def test_content_loop_skips_when_not_initialized( - self, _patch_cog_deps, mock_bot - ): + async def test_content_loop_skips_when_not_initialized(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) cog._initialized = False @@ -202,9 +200,7 @@ async def test_content_loop_posts_to_all_guilds(self, _patch_cog_deps, mock_bot) deps["poster"].post_unposted_items.assert_any_call(111) deps["poster"].post_unposted_items.assert_any_call(222) - async def test_content_loop_notifies_owner_on_error( - self, _patch_cog_deps, mock_bot - ): + async def test_content_loop_notifies_owner_on_error(self, _patch_cog_deps, mock_bot): deps = _patch_cog_deps deps["pipeline"].run_cycle = AsyncMock(side_effect=Exception("Test error")) @@ -217,14 +213,10 @@ async def test_content_loop_notifies_owner_on_error( call_args = mock_bot.notify_owner.call_args[0][0] assert "Test error" in call_args - async def test_content_loop_continues_on_guild_error( - self, _patch_cog_deps, mock_bot - ): + async def test_content_loop_continues_on_guild_error(self, _patch_cog_deps, mock_bot): deps = _patch_cog_deps deps["pipeline"].run_cycle = AsyncMock(return_value=(5, 3)) - deps["poster"].post_unposted_items = AsyncMock( - side_effect=[Exception("Guild 1 error"), 2] - ) + deps["poster"].post_unposted_items = AsyncMock(side_effect=[Exception("Guild 1 error"), 2]) guild1 = MagicMock(spec=discord.Guild) guild1.id = 111 @@ -245,9 +237,7 @@ async def test_content_loop_continues_on_guild_error( class TestContentLoopErrorHandler: - async def test_error_handler_notifies_owner_on_first_error( - self, _patch_cog_deps, mock_bot - ): + async def test_error_handler_notifies_owner_on_first_error(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) await cog.cog_load() @@ -272,9 +262,7 @@ async def test_error_handler_does_not_notify_owner_on_subsequent_errors( class TestContentLoopBackoff: - async def test_backoff_increments_consecutive_failures( - self, _patch_cog_deps, mock_bot - ): + async def test_backoff_increments_consecutive_failures(self, _patch_cog_deps, mock_bot): deps = _patch_cog_deps deps["pipeline"].run_cycle = AsyncMock(side_effect=Exception("Test error")) @@ -299,9 +287,7 @@ async def test_backoff_resets_on_success(self, _patch_cog_deps, mock_bot): assert cog._consecutive_failures == 0 - async def test_circuit_breaker_notifies_and_retries_hourly( - self, _patch_cog_deps, mock_bot - ): + async def test_circuit_breaker_notifies_and_retries_hourly(self, _patch_cog_deps, mock_bot): deps = _patch_cog_deps deps["pipeline"].run_cycle = AsyncMock(side_effect=Exception("Still failing")) @@ -330,9 +316,7 @@ async def test_circuit_breaker_recovers_on_success(self, _patch_cog_deps, mock_b assert cog._consecutive_failures == 0 assert cog.content_loop.minutes == cog._base_interval - async def test_apply_backoff_keeps_base_on_first_failure( - self, _patch_cog_deps, mock_bot - ): + async def test_apply_backoff_keeps_base_on_first_failure(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) await cog.cog_load() cog._consecutive_failures = 1 @@ -341,9 +325,7 @@ async def test_apply_backoff_keeps_base_on_first_failure( assert cog.content_loop.minutes == cog._base_interval - async def test_apply_backoff_doubles_on_second_failure( - self, _patch_cog_deps, mock_bot - ): + async def test_apply_backoff_doubles_on_second_failure(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) await cog.cog_load() cog._consecutive_failures = 2 @@ -352,9 +334,7 @@ async def test_apply_backoff_doubles_on_second_failure( assert cog.content_loop.minutes == cog._base_interval * 2 - async def test_apply_backoff_caps_at_max_multiplier( - self, _patch_cog_deps, mock_bot - ): + async def test_apply_backoff_caps_at_max_multiplier(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) await cog.cog_load() cog._consecutive_failures = 4 @@ -364,9 +344,7 @@ async def test_apply_backoff_caps_at_max_multiplier( max_interval = cog._base_interval * ContentPosting.MAX_BACKOFF_MULTIPLIER assert cog.content_loop.minutes == max_interval - async def test_reset_backoff_restores_base_interval( - self, _patch_cog_deps, mock_bot - ): + async def test_reset_backoff_restores_base_interval(self, _patch_cog_deps, mock_bot): cog = ContentPosting(mock_bot) await cog.cog_load() cog._consecutive_failures = 3 @@ -377,9 +355,7 @@ async def test_reset_backoff_restores_base_interval( assert cog._consecutive_failures == 0 assert cog.content_loop.minutes == cog._base_interval - async def test_only_notifies_owner_on_first_failure( - self, _patch_cog_deps, mock_bot - ): + async def test_only_notifies_owner_on_first_failure(self, _patch_cog_deps, mock_bot): deps = _patch_cog_deps deps["pipeline"].run_cycle = AsyncMock(side_effect=Exception("Test error")) diff --git a/tests/test_discord/test_search.py b/tests/test_discord/test_search.py index c64219a..fff4169 100644 --- a/tests/test_discord/test_search.py +++ b/tests/test_discord/test_search.py @@ -90,6 +90,8 @@ async def test_search_with_results( embed = call_kwargs.kwargs.get("embed") assert embed is not None assert len(embed.fields) == 2 + assert "Similarity score: 0.95" in embed.fields[0].value + assert "Relevance:" not in embed.fields[0].value async def test_search_embeds_query(self, search_cog, mock_interaction, mock_embedding_service): await search_cog.search.callback(search_cog, mock_interaction, "test query") diff --git a/tests/test_discord/test_summarize.py b/tests/test_discord/test_summarize.py index 2b19d70..213c62a 100644 --- a/tests/test_discord/test_summarize.py +++ b/tests/test_discord/test_summarize.py @@ -45,42 +45,23 @@ def mock_interaction(): class TestDetectUrlType: def test_detect_youtube_com(self, summarize_cog): - assert ( - summarize_cog.detect_url_type("https://www.youtube.com/watch?v=abc123") - == "youtube" - ) - assert ( - summarize_cog.detect_url_type("https://youtube.com/watch?v=abc123") - == "youtube" - ) + assert summarize_cog.detect_url_type("https://www.youtube.com/watch?v=abc123") == "youtube" + assert summarize_cog.detect_url_type("https://youtube.com/watch?v=abc123") == "youtube" def test_detect_youtu_be(self, summarize_cog): assert summarize_cog.detect_url_type("https://youtu.be/abc123") == "youtube" def test_detect_substack(self, summarize_cog): - assert ( - summarize_cog.detect_url_type("https://example.substack.com/p/article") - == "substack" - ) - assert ( - summarize_cog.detect_url_type("https://newsletter.substack.com/p/post") - == "substack" - ) + assert summarize_cog.detect_url_type("https://example.substack.com/p/article") == "substack" + assert summarize_cog.detect_url_type("https://newsletter.substack.com/p/post") == "substack" def test_detect_twitter(self, summarize_cog): - assert ( - summarize_cog.detect_url_type("https://twitter.com/user/status/123") - == "twitter" - ) - assert ( - summarize_cog.detect_url_type("https://x.com/user/status/123") == "twitter" - ) + assert summarize_cog.detect_url_type("https://twitter.com/user/status/123") == "twitter" + assert summarize_cog.detect_url_type("https://x.com/user/status/123") == "twitter" def test_detect_generic_web(self, summarize_cog): assert summarize_cog.detect_url_type("https://example.com/article") == "web" - assert ( - summarize_cog.detect_url_type("https://nytimes.com/2024/article") == "web" - ) + assert summarize_cog.detect_url_type("https://nytimes.com/2024/article") == "web" assert summarize_cog.detect_url_type("https://blog.example.org/post") == "web" @@ -222,9 +203,7 @@ def test_sets_image_when_thumbnail_provided(self, summarize_cog): class TestSummarizeCommand: async def test_rejects_invalid_url(self, summarize_cog, mock_interaction): - await summarize_cog.summarize.callback( - summarize_cog, mock_interaction, "not-a-url" - ) + await summarize_cog.summarize.callback(summarize_cog, mock_interaction, "not-a-url") mock_interaction.followup.send.assert_called_once() call_args = mock_interaction.followup.send.call_args @@ -372,9 +351,7 @@ async def test_handles_summarization_error(self, summarize_cog, mock_interaction content="This is enough content for summarization. " * 10, ) - summarize_cog._summarizer.summarize = AsyncMock( - side_effect=Exception("API Error") - ) + summarize_cog._summarizer.summarize = AsyncMock(side_effect=Exception("API Error")) with patch.object( summarize_cog, "_fetch_web_content", AsyncMock(return_value=mock_content) @@ -414,9 +391,7 @@ async def test_cog_unload_closes_http_client(self, mock_bot): class TestSummarizeCooldown: - async def test_cooldown_error_sends_retry_message( - self, summarize_cog, mock_interaction - ): + async def test_cooldown_error_sends_retry_message(self, summarize_cog, mock_interaction): from discord import app_commands mock_interaction.response.send_message = AsyncMock() @@ -448,9 +423,7 @@ async def test_cooldown_error_shows_seconds_only_for_short_wait( assert "45s" in call_args[0][0] assert "m " not in call_args[0][0] - async def test_non_cooldown_error_is_reraised( - self, summarize_cog, mock_interaction - ): + async def test_non_cooldown_error_is_reraised(self, summarize_cog, mock_interaction): from discord import app_commands error = app_commands.MissingPermissions(["manage_guild"])