From 370db08538bcdca86e31b475f9313730469a47c5 Mon Sep 17 00:00:00 2001 From: Rishabh Shah Date: Tue, 18 Feb 2025 15:33:35 -0500 Subject: [PATCH] Extracting Keywords using Yake --- backend/microservices/summarization_service.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/backend/microservices/summarization_service.py b/backend/microservices/summarization_service.py index 1495c27..dd4be2b 100755 --- a/backend/microservices/summarization_service.py +++ b/backend/microservices/summarization_service.py @@ -9,6 +9,7 @@ import openai from backend.core.config import Config from backend.core.utils import setup_logger, log_exception +import yake # Initialize logger logger = setup_logger(__name__) @@ -74,6 +75,13 @@ def run_summarization(text): logger.error(f"Error in summarization: {str(e)}") return "Error generating summary" +@log_exception(logger) +def get_keywords(text,num_keywords=1): + kw_extractor = yake.KeywordExtractor(top=num_keywords, lan='en') + keywords = kw_extractor.extract_keywords(text) + return [kw[0] for kw in keywords] + + @log_exception(logger) def process_articles(session_id=None): try: @@ -105,7 +113,8 @@ def process_articles(session_id=None): 'url': article['url'], 'urlToImage': article.get('urlToImage'), 'content': article.get('content', ''), - 'summary': summary + 'summary': summary, + 'filter_keywords': get_keywords(article.get('content', '')) }) # Save summarized articles to configured path with session_id