From 140f3b8ababe95841d237f85f76c6c8da4e4a09b Mon Sep 17 00:00:00 2001 From: Akalpit Dawkhar Date: Mon, 24 Feb 2025 01:39:29 -0500 Subject: [PATCH 01/12] Added storytracking --- backend/api_gateway/api_gateway.py | 49 ++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index 5b69660..56bfdd8 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -3,15 +3,16 @@ This Flask application aggregates endpoints from various microservices. """ -from flask import Flask, jsonify, request, make_response +from flask import Blueprint, Flask, jsonify, request, make_response from flask_cors import CORS -from flask_restx import Api, Resource, fields +from flask_restx import Api, Resource, fields, Namespace import sys import os import jwt import json import uuid import datetime +from datetime import datetime, timedelta from functools import wraps sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) @@ -27,6 +28,7 @@ from backend.core.utils import setup_logger, log_exception from backend.microservices.auth_service import load_users from backend.microservices.news_storage import store_article_in_supabase, log_user_search, add_bookmark, get_user_bookmarks, delete_bookmark + # Initialize logger logger = setup_logger(__name__) @@ -46,6 +48,7 @@ user_ns = api.namespace('api/user', description='User operations') auth_ns = api.namespace('api/auth', description='Authentication operations') bookmark_ns = api.namespace('api/bookmarks', description='Bookmark operations') +story_tracking_ns = api.namespace('api/story_tracking', description='Story tracking operations') def token_required(f): @wraps(f) @@ -348,6 +351,48 @@ def delete(self, bookmark_id): 'status': 'error', 'message': str(e) }, 500 + + + +@story_tracking_ns.route('/') +class StoryTracking(Resource): + @story_tracking_ns.param('keyword', 'Keyword to track for news updates') + def get(self): + """Fetch latest news for a tracked keyword""" + try: + keyword = request.args.get('keyword') + if not keyword: + return make_response(jsonify({ + 'status': 'error', + 'message': 'Keyword parameter is required' + }), 400) + + # Fetch latest articles for the keyword + articles = fetch_news(keyword) + + # Store articles and prepare response + processed_articles = [] + for article in articles: + article_id = store_article_in_supabase(article) + processed_articles.append({ + 'id': article_id, + 'title': article.get('title'), + 'url': article.get('url'), + 'source': article.get('source'), + 'publishedAt': article.get('publishedAt', datetime.now().isoformat()) + }) + + return make_response(jsonify({ + 'status': 'success', + 'articles': processed_articles + }), 200) + + except Exception as e: + logger.error(f"Error in story tracking: {str(e)}") + return make_response(jsonify({ + 'status': 'error', + 'message': str(e) + }), 500) if __name__ == '__main__': port = int(sys.argv[1]) if len(sys.argv) > 1 else Config.API_PORT From 76fab4e247b18040db0e0bcdc8c2eedd1475ac94 Mon Sep 17 00:00:00 2001 From: Rishabh Shah Date: Sun, 2 Mar 2025 02:46:52 -0500 Subject: [PATCH 02/12] Add schema for story tracking feature with RLS policies --- backend/api_gateway/api_gateway.py | 269 ++++++++++++- backend/data/story_tracking_schema.sql | 70 ++++ .../microservices/story_tracking_service.py | 375 +++++++++++++++++- 3 files changed, 698 insertions(+), 16 deletions(-) create mode 100644 backend/data/story_tracking_schema.sql diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index 56bfdd8..c07ee27 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -16,11 +16,12 @@ from functools import wraps sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +print("[DEBUG] [api_gateway] [startup] API Gateway starting up...") + # load env from dotenv import load_dotenv load_dotenv() - - +print("[DEBUG] [api_gateway] [startup] Environment variables loaded") from backend.microservices.summarization_service import run_summarization, process_articles from backend.microservices.news_fetcher import fetch_news @@ -31,15 +32,25 @@ # Initialize logger logger = setup_logger(__name__) +print("[DEBUG] [api_gateway] [startup] Logger initialized") # Initialize Flask app with CORS support app = Flask(__name__) app.config['SECRET_KEY'] = os.getenv('JWT_SECRET_KEY', 'your-secret-key') # Change this in production -CORS(app, origins=Config.CORS_ORIGINS, supports_credentials=True, allow_headers=['Content-Type', 'Authorization', 'Access-Control-Allow-Origin'], methods=['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], expose_headers=['Access-Control-Allow-Origin']) +print("[DEBUG] [api_gateway] [startup] Flask app initialized with secret key") + +# Improved CORS configuration to handle preflight requests properly +CORS(app, + origins=["http://localhost:5173", "http://localhost:5001"], + supports_credentials=True, + allow_headers=["Content-Type", "Authorization"], + methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"]) +print("[DEBUG] [api_gateway] [startup] CORS configured") # Initialize Flask-RestX api = Api(app, version='1.0', title='News Aggregator API', description='A news aggregation and summarization API') +print("[DEBUG] [api_gateway] [startup] Flask-RestX API initialized") # Define namespaces news_ns = api.namespace('api/news', description='News operations') @@ -49,19 +60,25 @@ auth_ns = api.namespace('api/auth', description='Authentication operations') bookmark_ns = api.namespace('api/bookmarks', description='Bookmark operations') story_tracking_ns = api.namespace('api/story_tracking', description='Story tracking operations') +print("[DEBUG] [api_gateway] [startup] API namespaces defined") def token_required(f): @wraps(f) def decorated(*args, **kwargs): + print("[DEBUG] [api_gateway] [token_required] Checking token in request") auth_header = request.headers.get('Authorization') if not auth_header: + print("[DEBUG] [api_gateway] [token_required] Authorization header missing") return {'error': 'Authorization header missing'}, 401 try: token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [token_required] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'],audience='authenticated') + print(f"[DEBUG] [api_gateway] [token_required] Token decoded successfully, user: {payload.get('sub', 'unknown')}") return f(*args, **kwargs) except Exception as e: + print(f"[DEBUG] [api_gateway] [token_required] Token validation error: {str(e)}") return {'error': 'Invalid token', 'message': str(e)}, 401 return decorated @@ -88,11 +105,14 @@ def decorated(*args, **kwargs): 'lastName': fields.String(required=False, description='Last name') }) +print("[DEBUG] [api_gateway] [startup] API models defined") + # Health check endpoint @health_ns.route('/') class HealthCheck(Resource): def get(self): """Check if API Gateway is healthy""" + print("[DEBUG] [api_gateway] [health_check] Called") return {"status": "API Gateway is healthy"}, 200 # Summarization endpoint @@ -101,9 +121,12 @@ class Summarize(Resource): @summarize_ns.expect(article_model) def post(self): """Summarize the given article text""" + print("[DEBUG] [api_gateway] [summarize] Called") data = request.get_json() article_text = data.get('article_text', '') + print(f"[DEBUG] [api_gateway] [summarize] Summarizing text of length: {len(article_text)}") summary = run_summarization(article_text) + print(f"[DEBUG] [api_gateway] [summarize] Summarization complete, summary length: {len(summary)}") return {"summary": summary}, 200 @news_ns.route('/fetch') @@ -119,26 +142,31 @@ def get(self): keyword = request.args.get('keyword', '') user_id = request.args.get('user_id') # optional session_id = request.args.get('session_id') + print(f"[DEBUG] [api_gateway] [news_fetch] Called with keyword: '{keyword}', user_id: {user_id}, session_id: {session_id}") - # Fetch articles from your existing news_fetcher module. + print(f"[DEBUG] [api_gateway] [news_fetch] Fetching news articles for keyword: '{keyword}'") articles = fetch_news(keyword) # This returns a list of articles. + print(f"[DEBUG] [api_gateway] [news_fetch] Found {len(articles) if articles else 0} articles") stored_article_ids = [] for article in articles: - # Store each article in the database; get its unique id. + print(f"[DEBUG] [api_gateway] [news_fetch] Storing article: {article.get('title', 'No title')}") article_id = store_article_in_supabase(article) stored_article_ids.append(article_id) + print(f"[DEBUG] [api_gateway] [news_fetch] Stored article with ID: {article_id}") - # If the request included a user_id, log the search for this article. if user_id: + print(f"[DEBUG] [api_gateway] [news_fetch] Logging search for user {user_id}, article {article_id}") log_user_search(user_id, article_id, session_id) + print(f"[DEBUG] [api_gateway] [news_fetch] Returning {len(stored_article_ids)} article IDs") return make_response(jsonify({ 'status': 'success', 'data': stored_article_ids }), 200) except Exception as e: + print(f"[DEBUG] [api_gateway] [news_fetch] Error: {str(e)}") return make_response(jsonify({ 'status': 'error', 'message': str(e) @@ -153,7 +181,10 @@ def post(self): """Process and summarize articles""" try: session_id = request.args.get('session_id') + print(f"[DEBUG] [api_gateway] [news_process] Called with session_id: {session_id}") + print("[DEBUG] [api_gateway] [news_process] Processing articles...") summarized_articles = process_articles(session_id) + print(f"[DEBUG] [api_gateway] [news_process] Processed {len(summarized_articles) if summarized_articles else 0} articles") return { 'status': 'success', 'message': 'Articles processed and summarized successfully', @@ -161,6 +192,7 @@ def post(self): 'session_id': session_id }, 200 except Exception as e: + print(f"[DEBUG] [api_gateway] [news_process] Error: {str(e)}") logger.error(f"Error processing articles: {str(e)}") return { 'status': 'error', @@ -173,20 +205,25 @@ class Signup(Resource): @auth_ns.expect(signup_model) def post(self): """Register a new user""" + print("[DEBUG] [api_gateway] [signup] User signup endpoint called") data = request.get_json() username = data.get('username') password = data.get('password') email = data.get('email') firstName = data.get('firstName', '') lastName = data.get('lastName', '') + print(f"[DEBUG] [api_gateway] [signup] Request for username: {username}, email: {email}") if not username or not password or not email: + print("[DEBUG] [api_gateway] [signup] Validation failed: missing required fields") return {'error': 'Username, password, and email are required'}, 400 users = load_users() + print(f"[DEBUG] [api_gateway] [signup] Loaded {len(users)} existing users") # Check if username already exists if any(u.get('username') == username for u in users): + print(f"[DEBUG] [api_gateway] [signup] Username {username} already exists") return {'error': 'Username already exists'}, 400 # Create new user with unique ID @@ -198,51 +235,67 @@ def post(self): 'firstName': firstName, 'lastName': lastName } + print(f"[DEBUG] [api_gateway] [signup] Created new user with ID: {new_user['id']}") users.append(new_user) try: # Save updated users list + print("[DEBUG] [api_gateway] [signup] Saving updated users list") with open(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'users.txt'), 'w') as f: json.dump(users, f, indent=4) + print("[DEBUG] [api_gateway] [signup] Users list saved successfully") except Exception as e: + print(f"[DEBUG] [api_gateway] [signup] Error saving user data: {str(e)}") return {'error': 'Failed to save user data', 'message': str(e)}, 500 # Generate JWT token + print("[DEBUG] [api_gateway] [signup] Generating JWT token") token = jwt.encode({ 'id': new_user['id'], 'username': new_user['username'], 'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1) }, app.config['SECRET_KEY'], algorithm='HS256') + print(f"[DEBUG] [api_gateway] [signup] Token generated: {token[:10]}...") # Exclude password from response user_data = {k: new_user[k] for k in new_user if k != 'password'} + print("[DEBUG] [api_gateway] [signup] Signup successful") return {'message': 'User registered successfully', 'user': user_data, 'token': token}, 201 @auth_ns.route('/login') class Login(Resource): def post(self): """Login and get authentication token""" + print("[DEBUG] [api_gateway] [login] Login endpoint called") data = request.get_json() username = data.get('username') password = data.get('password') + print(f"[DEBUG] [api_gateway] [login] Login attempt for username: {username}") if not username or not password: + print("[DEBUG] [api_gateway] [login] Validation failed: missing username or password") return {'error': 'Username and password are required'}, 400 users = load_users() + print(f"[DEBUG] [api_gateway] [login] Loaded {len(users)} users") user = next((u for u in users if u.get('username') == username and u.get('password') == password), None) if not user: + print(f"[DEBUG] [api_gateway] [login] Invalid credentials for username: {username}") return {'error': 'Invalid credentials'}, 401 + print(f"[DEBUG] [api_gateway] [login] Valid credentials for user: {user.get('id')}") + print("[DEBUG] [api_gateway] [login] Generating JWT token") token = jwt.encode({ 'id': user['id'], 'username': user['username'], 'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1) }, app.config['SECRET_KEY'], algorithm='HS256') + print(f"[DEBUG] [api_gateway] [login] Token generated: {token[:10]}...") user_data = {k: user[k] for k in user if k != 'password'} + print("[DEBUG] [api_gateway] [login] Login successful") return {'token': token, 'user': user_data} @user_ns.route('/profile') @@ -251,15 +304,20 @@ class UserProfile(Resource): @user_ns.marshal_with(user_profile_model) def get(self): """Get user profile information""" + print("[DEBUG] [api_gateway] [user_profile] Called") auth_header = request.headers.get('Authorization') token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [user_profile] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256']) + print(f"[DEBUG] [api_gateway] [user_profile] Looking up user with ID: {payload.get('id')}") users = load_users() user = next((u for u in users if u.get('id') == payload.get('id')), None) if not user: + print(f"[DEBUG] [api_gateway] [user_profile] User not found with ID: {payload.get('id')}") return {'error': 'User not found'}, 404 + print(f"[DEBUG] [api_gateway] [user_profile] Found user: {user.get('username')}") return {k: user[k] for k in user if k != 'password'}, 200 @bookmark_ns.route('/') @@ -268,14 +326,18 @@ class Bookmark(Resource): def get(self): """Get all bookmarked articles for the authenticated user""" try: + print("[DEBUG] [api_gateway] [get_bookmarks] Called") # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [get_bookmarks] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'],audience='authenticated') user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [get_bookmarks] Getting bookmarks for user: {user_id}") # Get bookmarks using the news_storage service bookmarks = get_user_bookmarks(user_id) + print(f"[DEBUG] [api_gateway] [get_bookmarks] Found {len(bookmarks)} bookmarks") return { 'status': 'success', @@ -283,6 +345,7 @@ def get(self): }, 200 except Exception as e: + print(f"[DEBUG] [api_gateway] [get_bookmarks] Error: {str(e)}") logger.error(f"Error fetching bookmarks: {str(e)}") return { 'status': 'error', @@ -293,22 +356,28 @@ def get(self): def post(self): """Add a bookmark for a news article""" try: + print("[DEBUG] [api_gateway] [add_bookmark] Called") # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [add_bookmark] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'],audience='authenticated') user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [add_bookmark] Adding bookmark for user: {user_id}") # Get the news article ID from the request body data = request.get_json() news_id = data.get('news_id') + print(f"[DEBUG] [api_gateway] [add_bookmark] News article ID: {news_id}") if not news_id: + print("[DEBUG] [api_gateway] [add_bookmark] News article ID missing in request") return {'error': 'News article ID is required'}, 400 # Add the bookmark using the news_storage service - # bookmark = add_bookmark(user_id, '054c021a-f6f3-44b2-a43f-1ca0d211eb15') + print(f"[DEBUG] [api_gateway] [add_bookmark] Adding bookmark for user {user_id}, article {news_id}") bookmark = add_bookmark(user_id, news_id) + print(f"[DEBUG] [api_gateway] [add_bookmark] Bookmark added with ID: {bookmark['id'] if isinstance(bookmark, dict) else bookmark}") return { 'status': 'success', @@ -319,6 +388,7 @@ def post(self): }, 201 except Exception as e: + print(f"[DEBUG] [api_gateway] [add_bookmark] Error: {str(e)}") logger.error(f"Error adding bookmark: {str(e)}") return { 'status': 'error', @@ -331,14 +401,18 @@ class BookmarkDelete(Resource): def delete(self, bookmark_id): """Remove a bookmark for a news article""" try: + print(f"[DEBUG] [api_gateway] [delete_bookmark] Called for bookmark: {bookmark_id}") # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [delete_bookmark] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'],audience='authenticated') user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [delete_bookmark] Deleting bookmark {bookmark_id} for user {user_id}") # Delete the bookmark using the news_storage service result = delete_bookmark(user_id, bookmark_id) + print(f"[DEBUG] [api_gateway] [delete_bookmark] Deletion result: {result}") return { 'status': 'success', @@ -346,6 +420,7 @@ def delete(self, bookmark_id): }, 200 except Exception as e: + print(f"[DEBUG] [api_gateway] [delete_bookmark] Error: {str(e)}") logger.error(f"Error removing bookmark: {str(e)}") return { 'status': 'error', @@ -354,46 +429,220 @@ def delete(self, bookmark_id): +# Import story tracking service +from backend.microservices.story_tracking_service import ( + create_tracked_story, get_tracked_stories, get_story_details, + delete_tracked_story, find_related_articles, update_all_tracked_stories +) +print("[DEBUG] [api_gateway] [startup] Story tracking service modules imported") + @story_tracking_ns.route('/') class StoryTracking(Resource): @story_tracking_ns.param('keyword', 'Keyword to track for news updates') def get(self): """Fetch latest news for a tracked keyword""" try: + print("[DEBUG] [api_gateway] [story_tracking] Story tracking get endpoint called") keyword = request.args.get('keyword') + print(f"[DEBUG] [api_gateway] [story_tracking] Requested keyword: '{keyword}'") if not keyword: + print("[DEBUG] [api_gateway] [story_tracking] Keyword parameter missing") return make_response(jsonify({ 'status': 'error', 'message': 'Keyword parameter is required' }), 400) - # Fetch latest articles for the keyword + print(f"[DEBUG] [api_gateway] [story_tracking] Fetching news for keyword: '{keyword}'") articles = fetch_news(keyword) + print(f"[DEBUG] [api_gateway] [story_tracking] Found {len(articles) if articles else 0} articles") - # Store articles and prepare response processed_articles = [] for article in articles: + print(f"[DEBUG] [api_gateway] [story_tracking] Processing article: {article.get('title', 'No title')}") article_id = store_article_in_supabase(article) + print(f"[DEBUG] [api_gateway] [story_tracking] Stored article with ID: {article_id}") processed_articles.append({ 'id': article_id, 'title': article.get('title'), 'url': article.get('url'), - 'source': article.get('source'), + 'source': article.get('source', {}).get('name') if isinstance(article.get('source'), dict) else article.get('source'), 'publishedAt': article.get('publishedAt', datetime.now().isoformat()) }) + print(f"[DEBUG] [api_gateway] [story_tracking] Returning {len(processed_articles)} processed articles") return make_response(jsonify({ 'status': 'success', 'articles': processed_articles }), 200) except Exception as e: + print(f"[DEBUG] [api_gateway] [story_tracking] Error: {str(e)}") logger.error(f"Error in story tracking: {str(e)}") return make_response(jsonify({ 'status': 'error', 'message': str(e) }), 500) + + @token_required + def post(self): + """Create a new tracked story""" + try: + print("[DEBUG] [api_gateway] [story_tracking] Called") + # Get the user ID from the token + auth_header = request.headers.get('Authorization') + token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [story_tracking] Decoding token: {token[:10]}...") + payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'], audience='authenticated') + user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [story_tracking] Creating tracked story for user: {user_id}") + + # Get request data + data = request.get_json() + keyword = data.get('keyword') + source_article_id = data.get('sourceArticleId') + print(f"[DEBUG] [api_gateway] [story_tracking] Story details - Keyword: '{keyword}', Source article: {source_article_id}") + + if not keyword: + print("[DEBUG] [api_gateway] [story_tracking] Keyword parameter missing in request") + return make_response(jsonify({ + 'status': 'error', + 'message': 'Keyword is required' + }), 400) + + print(f"[DEBUG] [api_gateway] [story_tracking] Calling create_tracked_story with user_id: {user_id}, keyword: '{keyword}'") + tracked_story = create_tracked_story(user_id, keyword, source_article_id) + print(f"[DEBUG] [api_gateway] [story_tracking] Tracked story created with ID: {tracked_story['id'] if tracked_story else 'unknown'}") + + print(f"[DEBUG] [api_gateway] [story_tracking] Getting full story details for story: {tracked_story['id']}") + story_with_articles = get_story_details(tracked_story['id']) + print(f"[DEBUG] [api_gateway] [story_tracking] Found {len(story_with_articles.get('articles', [])) if story_with_articles else 0} related articles") + + return make_response(jsonify({ + 'status': 'success', + 'data': story_with_articles + }), 201) + + except Exception as e: + print(f"[DEBUG] [api_gateway] [story_tracking] Error: {str(e)}") + logger.error(f"Error creating tracked story: {str(e)}") + return make_response(jsonify({ + 'status': 'error', + 'message': str(e) + }), 500) + +@story_tracking_ns.route('/user') +class UserStoryTracking(Resource): + @token_required + def get(self): + """Get all tracked stories for the authenticated user""" + try: + print("[DEBUG] [api_gateway] [user_story_tracking] Called") + # Get the user ID from the token + auth_header = request.headers.get('Authorization') + token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [user_story_tracking] Decoding token: {token[:10]}...") + payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'], audience='authenticated') + user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [user_story_tracking] Getting tracked stories for user: {user_id}") + + print(f"[DEBUG] [api_gateway] [user_story_tracking] Calling get_tracked_stories") + tracked_stories = get_tracked_stories(user_id) + print(f"[DEBUG] [api_gateway] [user_story_tracking] Found {len(tracked_stories)} tracked stories") + + return make_response(jsonify({ + 'status': 'success', + 'data': tracked_stories + }), 200) + + except Exception as e: + print(f"[DEBUG] [api_gateway] [user_story_tracking] Error: {str(e)}") + logger.error(f"Error getting tracked stories: {str(e)}") + return make_response(jsonify({ + 'status': 'error', + 'message': str(e) + }), 500) + +@story_tracking_ns.route('/') +class StoryTrackingDetail(Resource): + @token_required + def get(self, story_id): + """Get details for a specific tracked story""" + try: + print(f"[DEBUG] [api_gateway] [story_tracking_detail] Called for story: {story_id}") + print(f"[DEBUG] [api_gateway] [story_tracking_detail] Calling get_story_details for story: {story_id}") + story = get_story_details(story_id) + + if not story: + print(f"[DEBUG] [api_gateway] [story_tracking_detail] No story found with ID: {story_id}") + return make_response(jsonify({ + 'status': 'error', + 'message': 'Tracked story not found' + }), 404) + + print(f"[DEBUG] [api_gateway] [story_tracking_detail] Found story: {story['keyword']}") + print(f"[DEBUG] [api_gateway] [story_tracking_detail] Story has {len(story.get('articles', []))} articles") + return make_response(jsonify({ + 'status': 'success', + 'data': story + }), 200) + + except Exception as e: + print(f"[DEBUG] [api_gateway] [story_tracking_detail] Error: {str(e)}") + logger.error(f"Error getting story details: {str(e)}") + return make_response(jsonify({ + 'status': 'error', + 'message': str(e) + }), 500) + + @token_required + def delete(self, story_id): + """Stop tracking a story""" + try: + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Called for story: {story_id}") + # Get the user ID from the token + auth_header = request.headers.get('Authorization') + token = auth_header.split()[1] + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Decoding token: {token[:10]}...") + payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'], audience='authenticated') + user_id = payload.get('sub') + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Deleting tracked story {story_id} for user {user_id}") + + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Calling delete_tracked_story") + success = delete_tracked_story(user_id, story_id) + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Delete result: {success}") + + if not success: + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Failed to delete story or story not found") + return make_response(jsonify({ + 'status': 'error', + 'message': 'Failed to delete tracked story or story not found' + }), 404) + + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Story deleted successfully") + return make_response(jsonify({ + 'status': 'success', + 'message': 'Tracked story deleted successfully' + }), 200) + + except Exception as e: + print(f"[DEBUG] [api_gateway] [delete_story_tracking] Error: {str(e)}") + logger.error(f"Error deleting tracked story: {str(e)}") + return make_response(jsonify({ + 'status': 'error', + 'message': str(e) + }), 500) + +@app.route('/api/story_tracking', methods=['OPTIONS']) +def story_tracking_options(): + print("[DEBUG] [api_gateway] [story_tracking_options] Called") + response = make_response() + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") + response.headers.add("Access-Control-Allow-Methods", "GET,POST,PUT,DELETE,OPTIONS") + print("[DEBUG] [api_gateway] [story_tracking_options] Responding with CORS headers") + return response if __name__ == '__main__': port = int(sys.argv[1]) if len(sys.argv) > 1 else Config.API_PORT + print(f"[DEBUG] [api_gateway] [main] Starting on {Config.API_HOST}:{port} with debug={True}") app.run(host=Config.API_HOST, port=port, debug=True) diff --git a/backend/data/story_tracking_schema.sql b/backend/data/story_tracking_schema.sql new file mode 100644 index 0000000..60e3636 --- /dev/null +++ b/backend/data/story_tracking_schema.sql @@ -0,0 +1,70 @@ +-- story_tracking_schema.sql +-- Schema for story tracking feature + +-- Table for tracked stories +CREATE TABLE tracked_stories ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + user_id UUID NOT NULL REFERENCES auth.users(id), + keyword VARCHAR(255) NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + last_updated TIMESTAMP NOT NULL DEFAULT NOW() +); + +-- Table for articles related to tracked stories +CREATE TABLE tracked_story_articles ( + tracked_story_id UUID REFERENCES tracked_stories(id) ON DELETE CASCADE, + news_id UUID REFERENCES news_articles(id) ON DELETE CASCADE, + added_at TIMESTAMP NOT NULL DEFAULT NOW(), + PRIMARY KEY (tracked_story_id, news_id) +); + +-- Index for faster lookups +CREATE INDEX idx_tracked_stories_user_id ON tracked_stories(user_id); +CREATE INDEX idx_tracked_stories_keyword ON tracked_stories(keyword); +CREATE INDEX idx_tracked_story_articles_story_id ON tracked_story_articles(tracked_story_id); + +-- RLS Policies for tracked_stories +ALTER TABLE tracked_stories ENABLE ROW LEVEL SECURITY; + +-- Allow users to view only their own tracked stories +CREATE POLICY tracked_stories_select_policy ON tracked_stories + FOR SELECT USING (auth.uid() = user_id); + +-- Allow users to insert their own tracked stories +CREATE POLICY tracked_stories_insert_policy ON tracked_stories + FOR INSERT WITH CHECK (auth.uid() = user_id); + +-- Allow users to update only their own tracked stories +CREATE POLICY tracked_stories_update_policy ON tracked_stories + FOR UPDATE USING (auth.uid() = user_id); + +-- Allow users to delete only their own tracked stories +CREATE POLICY tracked_stories_delete_policy ON tracked_stories + FOR DELETE USING (auth.uid() = user_id); + +-- RLS Policies for tracked_story_articles +ALTER TABLE tracked_story_articles ENABLE ROW LEVEL SECURITY; + +-- Allow users to view only articles related to their tracked stories +CREATE POLICY tracked_story_articles_select_policy ON tracked_story_articles + FOR SELECT USING ( + tracked_story_id IN ( + SELECT id FROM tracked_stories WHERE user_id = auth.uid() + ) + ); + +-- Allow users to insert only articles related to their tracked stories +CREATE POLICY tracked_story_articles_insert_policy ON tracked_story_articles + FOR INSERT WITH CHECK ( + tracked_story_id IN ( + SELECT id FROM tracked_stories WHERE user_id = auth.uid() + ) + ); + +-- Allow users to delete only articles related to their tracked stories +CREATE POLICY tracked_story_articles_delete_policy ON tracked_story_articles + FOR DELETE USING ( + tracked_story_id IN ( + SELECT id FROM tracked_stories WHERE user_id = auth.uid() + ) + ); \ No newline at end of file diff --git a/backend/microservices/story_tracking_service.py b/backend/microservices/story_tracking_service.py index 51f1911..caafb48 100755 --- a/backend/microservices/story_tracking_service.py +++ b/backend/microservices/story_tracking_service.py @@ -1,18 +1,381 @@ #!/usr/bin/env python3 """ story_tracking_service.py - Microservice for Story Tracking -Wraps the story clustering logic and provides API endpoints for tracking stories. +Provides functionality for tracking news stories by keyword and finding related articles. """ +import os +import datetime +from supabase import create_client, Client +from dotenv import load_dotenv from summarization.story_tracking.story_tracking import cluster_articles +from backend.microservices.news_fetcher import fetch_news + +print("[DEBUG] [story_tracking_service] [main] Story tracking service starting...") + +# Load environment variables +load_dotenv() +print("[DEBUG] [story_tracking_service] [main] Environment variables loaded") + +# Initialize Supabase client with service role key for admin access to bypass RLS +SUPABASE_URL = os.getenv("VITE_SUPABASE_URL") +# Use service role key instead of anon key to bypass RLS policies +SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY") + +print(f"[DEBUG] [story_tracking_service] [main] Supabase URL: {SUPABASE_URL}") +print(f"[DEBUG] [story_tracking_service] [main] Supabase Key: {SUPABASE_SERVICE_KEY[:5]}..." if SUPABASE_SERVICE_KEY else "[DEBUG] [story_tracking_service] [main] Supabase Key: None") + +supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY) + +print("[DEBUG] [story_tracking_service] [main] Supabase client initialized") def run_story_tracking(article_embeddings): + print(f"[DEBUG] [story_tracking_service] [run_story_tracking] Running story tracking with {len(article_embeddings) if article_embeddings else 0} embeddings") labels = cluster_articles(article_embeddings) + print(f"[DEBUG] [story_tracking_service] [run_story_tracking] Clustering complete, found {len(labels) if labels else 0} labels") return labels +def create_tracked_story(user_id, keyword, source_article_id=None): + """ + Creates a new tracked story for a user based on a keyword. + + Args: + user_id: The ID of the user tracking the story + keyword: The keyword/topic to track + source_article_id: Optional ID of the source article that initiated tracking + + Returns: + The created tracked story record + """ + + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Creating tracked story for user {user_id}, keyword: '{keyword}', source_article: {source_article_id}") + try: + # Check if the user is already tracking this keyword + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Checking if user already tracks keyword '{keyword}'") + existing = supabase.table("tracked_stories") \ + .select("*") \ + .eq("user_id", user_id) \ + .eq("keyword", keyword) \ + .execute() + + if existing.data and len(existing.data) > 0: + # User is already tracking this keyword + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] User already tracking this keyword, found {len(existing.data)} existing entries") + return existing.data[0] + + # Create a new tracked story + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Creating new tracked story record") + current_time = datetime.datetime.utcnow().isoformat() + result = supabase.table("tracked_stories").insert({ + "user_id": user_id, + "keyword": keyword, + "created_at": current_time, + "last_updated": current_time + }).execute() + + if not result.data: + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Failed to create tracked story: {result}") + return None + + tracked_story = result.data[0] if result.data else None + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Tracked story created with ID: {tracked_story['id'] if tracked_story else None}") + + # If a source article was provided, link it to the tracked story + if tracked_story and source_article_id: + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Linking source article {source_article_id} to tracked story") + supabase.table("tracked_story_articles").insert({ + "tracked_story_id": tracked_story["id"], + "news_id": source_article_id, + "added_at": datetime.datetime.utcnow().isoformat() + }).execute() + + # Immediately find and add related articles + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Finding related articles for new tracked story") + find_related_articles(tracked_story["id"], keyword) + + return tracked_story + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Error creating tracked story: {str(e)}") + raise e + +def get_tracked_stories(user_id): + """ + Gets all tracked stories for a user. + + Args: + user_id: The ID of the user + + Returns: + List of tracked stories with their related articles + """ + print(f"[DEBUG] [story_tracking_service] [get_tracked_stories] Getting tracked stories for user {user_id}") + try: + # Get all tracked stories for the user + result = supabase.table("tracked_stories") \ + .select("*") \ + .eq("user_id", user_id) \ + .order("created_at", desc=True) \ + .execute() + + tracked_stories = result.data if result.data else [] + print(f"[DEBUG] [story_tracking_service] [get_tracked_stories] Found {len(tracked_stories)} tracked stories") + + # For each tracked story, get its related articles + for story in tracked_stories: + print(f"[DEBUG] [story_tracking_service] [get_tracked_stories] Getting articles for story {story['id']}") + story["articles"] = get_story_articles(story["id"]) + print(f"[DEBUG] [story_tracking_service] [get_tracked_stories] Found {len(story['articles'])} articles for story {story['id']}") + + return tracked_stories + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [get_tracked_stories] Error getting tracked stories: {str(e)}") + raise e + +def get_story_details(story_id): + """ + Gets details for a specific tracked story including related articles. + + Args: + story_id: The ID of the tracked story + + Returns: + The tracked story with its related articles + """ + print(f"[DEBUG] [story_tracking_service] [get_story_details] Getting story details for story ID {story_id}") + try: + # Get the tracked story + result = supabase.table("tracked_stories") \ + .select("*") \ + .eq("id", story_id) \ + .execute() + + if not result.data or len(result.data) == 0: + print(f"[DEBUG] [story_tracking_service] [get_story_details] No story found with ID {story_id}") + return None + + story = result.data[0] + print(f"[DEBUG] [story_tracking_service] [get_story_details] Found story: {story['keyword']}") + + # Get related articles + print(f"[DEBUG] [story_tracking_service] [get_story_details] Getting related articles") + story["articles"] = get_story_articles(story_id) + print(f"[DEBUG] [story_tracking_service] [get_story_details] Found {len(story['articles'])} related articles") + + return story + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [get_story_details] Error getting story details: {str(e)}") + raise e + +def delete_tracked_story(user_id, story_id): + """ + Deletes a tracked story for a user. + + Args: + user_id: The ID of the user + story_id: The ID of the tracked story to delete + + Returns: + True if successful, False otherwise + """ + print(f"[DEBUG] [story_tracking_service] [delete_tracked_story] Deleting tracked story {story_id} for user {user_id}") + try: + # Delete the tracked story (related articles will be deleted via CASCADE) + result = supabase.table("tracked_stories") \ + .delete() \ + .eq("id", story_id) \ + .eq("user_id", user_id) \ + .execute() + + success = len(result.data) > 0 + print(f"[DEBUG] [story_tracking_service] [delete_tracked_story] Delete operation {'successful' if success else 'failed'}") + return success + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [delete_tracked_story] Error deleting tracked story: {str(e)}") + raise e + +def get_story_articles(story_id): + """ + Gets all articles related to a tracked story. + + Args: + story_id: The ID of the tracked story + + Returns: + List of articles related to the tracked story + """ + print(f"[DEBUG] [story_tracking_service] [get_story_articles] Getting articles for story {story_id}") + try: + # Get all article IDs related to the tracked story + result = supabase.table("tracked_story_articles") \ + .select("news_id, added_at") \ + .eq("tracked_story_id", story_id) \ + .order("added_at", desc=True) \ + .execute() + + article_refs = result.data if result.data else [] + print(f"[DEBUG] [story_tracking_service] [get_story_articles] Found {len(article_refs)} article references") + + if not article_refs: + return [] + + # Get the full article details for each article ID + articles = [] + for ref in article_refs: + print(f"[DEBUG] [story_tracking_service] [get_story_articles] Getting details for article {ref['news_id']}") + article_result = supabase.table("news_articles") \ + .select("*") \ + .eq("id", ref["news_id"]) \ + .execute() + + if article_result.data and len(article_result.data) > 0: + article = article_result.data[0] + # Add the added_at timestamp from the join table + article["added_at"] = ref["added_at"] + articles.append(article) + print(f"[DEBUG] [story_tracking_service] [get_story_articles] Added article: {article.get('title', 'No title')}") + else: + print(f"[DEBUG] [story_tracking_service] [get_story_articles] No data found for article {ref['news_id']}") + + return articles + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [get_story_articles] Error getting story articles: {str(e)}") + raise e + +def find_related_articles(story_id, keyword): + """ + Finds and adds articles related to a tracked story based on its keyword. + + Args: + story_id: The ID of the tracked story + keyword: The keyword to search for + + Returns: + Number of new articles added + """ + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Finding related articles for story {story_id}, keyword: '{keyword}'") + try: + # Get the tracked story to check when it was last updated + story_result = supabase.table("tracked_stories") \ + .select("*") \ + .eq("id", story_id) \ + .execute() + + if not story_result.data or len(story_result.data) == 0: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] No story found with ID {story_id}") + return 0 + + story = story_result.data[0] + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Found story: {story['keyword']}") + + # Fetch articles related to the keyword + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Fetching articles for keyword '{keyword}'") + articles = fetch_news(keyword) + + if not articles: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] No articles found for keyword '{keyword}'") + return 0 + + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Found {len(articles)} articles for keyword '{keyword}'") + + # Get existing article IDs for this story to avoid duplicates + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Getting existing article IDs for story {story_id}") + existing_result = supabase.table("tracked_story_articles") \ + .select("news_id") \ + .eq("tracked_story_id", story_id) \ + .execute() + + existing_ids = [item["news_id"] for item in existing_result.data] if existing_result.data else [] + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Found {len(existing_ids)} existing article IDs") + + # Process and add new articles + new_articles_count = 0 + for article in articles: + # First, store the article in the news_articles table + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Storing article: {article.get('title', 'No title')}") + from backend.microservices.news_storage import store_article_in_supabase + article_id = store_article_in_supabase(article) + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Article stored with ID: {article_id}") + + # If this article is not already linked to the story, add it + if article_id not in existing_ids: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Linking new article {article_id} to story {story_id}") + supabase.table("tracked_story_articles").insert({ + "tracked_story_id": story_id, + "news_id": article_id, + "added_at": datetime.datetime.utcnow().isoformat() + }).execute() + new_articles_count += 1 + else: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Article {article_id} already linked to story") + + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Added {new_articles_count} new articles to story {story_id}") + + # Update the last_updated timestamp of the tracked story + if new_articles_count > 0: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Updating last_updated timestamp for story {story_id}") + supabase.table("tracked_stories") \ + .update({"last_updated": datetime.datetime.utcnow().isoformat()}) \ + .eq("id", story_id) \ + .execute() + + return new_articles_count + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [find_related_articles] Error finding related articles: {str(e)}") + raise e + +def update_all_tracked_stories(): + """ + Background job to update all tracked stories with new related articles. + + Returns: + Dictionary with counts of stories updated and new articles found + """ + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Starting update of all tracked stories") + try: + # Get all tracked stories + result = supabase.table("tracked_stories") \ + .select("id, keyword") \ + .execute() + + tracked_stories = result.data if result.data else [] + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Found {len(tracked_stories)} tracked stories to update") + + if not tracked_stories: + return {"stories_updated": 0, "new_articles": 0} + + # Update each tracked story + stories_updated = 0 + total_new_articles = 0 + + for story in tracked_stories: + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Updating story {story['id']}, keyword: '{story['keyword']}'") + new_articles = find_related_articles(story["id"], story["keyword"]) + if new_articles > 0: + stories_updated += 1 + total_new_articles += new_articles + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Added {new_articles} new articles to story {story['id']}") + else: + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] No new articles found for story {story['id']}") + + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Update complete. Updated {stories_updated} stories with {total_new_articles} new articles") + return { + "stories_updated": stories_updated, + "new_articles": total_new_articles + } + + except Exception as e: + print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Error updating tracked stories: {str(e)}") + raise e + if __name__ == '__main__': - # Example dummy embeddings: - import numpy as np - dummy_embeddings = [np.random.rand(10) for _ in range(10)] - print("Story Tracking Service Output:") - print(run_story_tracking(dummy_embeddings)) + # Example usage + print("[DEBUG] [story_tracking_service] [main] Running story_tracking_service.py as main") + result = update_all_tracked_stories() + print(f"[DEBUG] [story_tracking_service] [main] Updated {result['stories_updated']} stories with {result['new_articles']} new articles") + print("[DEBUG] [story_tracking_service] [main] Execution completed") From f2508ebae9c1fb3e96b00ae20e82fd16e7853aa9 Mon Sep 17 00:00:00 2001 From: Akalpit Dawkhar Date: Sun, 2 Mar 2025 13:27:18 -0500 Subject: [PATCH 03/12] Added Documentation --- backend/api_gateway/api_gateway.py | 288 ++++++++++++++---- .../core/__pycache__/config.cpython-312.pyc | Bin 2321 -> 2354 bytes .../summarization_service.cpython-312.pyc | Bin 6234 -> 6524 bytes backend/microservices/auth_service.py | 42 ++- backend/microservices/news_fetcher.py | 74 ++++- backend/microservices/news_storage.py | 135 +++++++- backend/microservices/nope.env | 17 -- .../microservices/story_tracking_service.py | 52 +++- .../microservices/summarization_service.py | 73 ++++- 9 files changed, 578 insertions(+), 103 deletions(-) delete mode 100644 backend/microservices/nope.env diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index c07ee27..f4625b9 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -1,8 +1,30 @@ + #!/usr/bin/env python3 -"""api_gateway.py - API Gateway for the News Aggregator Backend -This Flask application aggregates endpoints from various microservices. +"""API Gateway for the News Aggregator Backend + +This module serves as the central API Gateway for the News Aggregator application. +It provides a unified interface for clients to interact with various microservices +including news fetching, summarization, authentication, and story tracking. + +Key Features: +- RESTful API endpoints using Flask-RestX +- JWT-based authentication +- CORS support for cross-origin requests +- Swagger documentation +- Error handling and logging +- Integration with multiple microservices + +Endpoints: +- /api/news: News fetching and processing +- /health: System health check +- /summarize: Article summarization +- /api/user: User profile management +- /api/auth: Authentication operations +- /api/bookmarks: Bookmark management +- /api/story_tracking: Story tracking functionality """ +# Standard library imports from flask import Blueprint, Flask, jsonify, request, make_response from flask_cors import CORS from flask_restx import Api, Resource, fields, Namespace @@ -14,15 +36,18 @@ import datetime from datetime import datetime, timedelta from functools import wraps + +# Add project root to Python path for relative imports sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) print("[DEBUG] [api_gateway] [startup] API Gateway starting up...") -# load env +# Load environment variables from .env file from dotenv import load_dotenv load_dotenv() print("[DEBUG] [api_gateway] [startup] Environment variables loaded") +# Import microservices and utilities from backend.microservices.summarization_service import run_summarization, process_articles from backend.microservices.news_fetcher import fetch_news from backend.core.config import Config @@ -30,16 +55,16 @@ from backend.microservices.auth_service import load_users from backend.microservices.news_storage import store_article_in_supabase, log_user_search, add_bookmark, get_user_bookmarks, delete_bookmark -# Initialize logger +# Initialize logger for the API Gateway logger = setup_logger(__name__) print("[DEBUG] [api_gateway] [startup] Logger initialized") -# Initialize Flask app with CORS support +# Initialize Flask application with security configurations app = Flask(__name__) -app.config['SECRET_KEY'] = os.getenv('JWT_SECRET_KEY', 'your-secret-key') # Change this in production +app.config['SECRET_KEY'] = os.getenv('JWT_SECRET_KEY', 'your-secret-key') # JWT secret key for token signing print("[DEBUG] [api_gateway] [startup] Flask app initialized with secret key") -# Improved CORS configuration to handle preflight requests properly +# Configure CORS to allow specific origins and methods CORS(app, origins=["http://localhost:5173", "http://localhost:5001"], supports_credentials=True, @@ -47,12 +72,12 @@ methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"]) print("[DEBUG] [api_gateway] [startup] CORS configured") -# Initialize Flask-RestX +# Initialize Flask-RestX for API documentation api = Api(app, version='1.0', title='News Aggregator API', description='A news aggregation and summarization API') print("[DEBUG] [api_gateway] [startup] Flask-RestX API initialized") -# Define namespaces +# Define API namespaces for logical grouping of endpoints news_ns = api.namespace('api/news', description='News operations') health_ns = api.namespace('health', description='Health check operations') summarize_ns = api.namespace('summarize', description='Text summarization operations') @@ -63,6 +88,20 @@ print("[DEBUG] [api_gateway] [startup] API namespaces defined") def token_required(f): + """Decorator to protect routes that require authentication. + + This decorator validates the JWT token in the Authorization header. + It ensures that only authenticated users can access protected endpoints. + + Args: + f: The function to be decorated. + + Returns: + decorated: The decorated function that includes token validation. + + Raises: + 401: If the token is missing or invalid. + """ @wraps(f) def decorated(*args, **kwargs): print("[DEBUG] [api_gateway] [token_required] Checking token in request") @@ -71,7 +110,7 @@ def decorated(*args, **kwargs): print("[DEBUG] [api_gateway] [token_required] Authorization header missing") return {'error': 'Authorization header missing'}, 401 try: - token = auth_header.split()[1] + token = auth_header.split()[1] # Extract token from 'Bearer ' print(f"[DEBUG] [api_gateway] [token_required] Decoding token: {token[:10]}...") payload = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'],audience='authenticated') print(f"[DEBUG] [api_gateway] [token_required] Token decoded successfully, user: {payload.get('sub', 'unknown')}") @@ -82,7 +121,7 @@ def decorated(*args, **kwargs): return {'error': 'Invalid token', 'message': str(e)}, 401 return decorated -# Define models for documentation +# Define API models for request/response documentation article_model = api.model('Article', { 'article_text': fields.String(required=True, description='The text to summarize') }) @@ -96,7 +135,7 @@ def decorated(*args, **kwargs): 'avatarUrl': fields.String(description='URL to user avatar') }) -# Model for user signup +# Model for user registration signup_model = api.model('Signup', { 'username': fields.String(required=True, description='Username'), 'password': fields.String(required=True, description='Password'), @@ -107,20 +146,33 @@ def decorated(*args, **kwargs): print("[DEBUG] [api_gateway] [startup] API models defined") -# Health check endpoint +# Health check endpoint for system monitoring @health_ns.route('/') class HealthCheck(Resource): def get(self): - """Check if API Gateway is healthy""" + """Check the health status of the API Gateway. + + Returns: + dict: A dictionary containing the health status. + int: HTTP 200 status code indicating success. + """ print("[DEBUG] [api_gateway] [health_check] Called") return {"status": "API Gateway is healthy"}, 200 -# Summarization endpoint +# Endpoint for article summarization @summarize_ns.route('/') class Summarize(Resource): @summarize_ns.expect(article_model) def post(self): - """Summarize the given article text""" + """Summarize the provided article text. + + Expects a JSON payload with 'article_text' field. + Uses the summarization service to generate a concise summary. + + Returns: + dict: Contains the generated summary. + int: HTTP 200 status code on success. + """ print("[DEBUG] [api_gateway] [summarize] Called") data = request.get_json() article_text = data.get('article_text', '') @@ -135,8 +187,19 @@ class NewsFetch(Resource): @news_ns.param('user_id', 'User ID for logging search history') @news_ns.param('session_id', 'Session ID for tracking requests') def get(self): - """ - Fetch news articles, store them in Supabase, and log user search history if a user ID is provided. + """Fetch news articles based on a keyword and store them in Supabase. + + This endpoint fetches news articles matching the provided keyword, + stores them in Supabase, and logs the search history if a user ID is provided. + + Args: + keyword (str): The search term for fetching news articles. + user_id (str, optional): User ID for logging search history. + session_id (str): Session ID for tracking the request. + + Returns: + dict: Contains the stored article IDs and success status. + int: HTTP 200 on success, 500 on error. """ try: keyword = request.args.get('keyword', '') @@ -173,12 +236,22 @@ def get(self): }), 500) -# News processing endpoint @news_ns.route('/process') class NewsProcess(Resource): @news_ns.param('session_id', 'Session ID for tracking requests') def post(self): - """Process and summarize articles""" + """Process and summarize a batch of articles. + + This endpoint processes articles associated with the provided session ID, + generating summaries and performing any necessary data transformations. + + Args: + session_id (str): Session ID for tracking the request and identifying articles. + + Returns: + dict: Contains processed articles data and success status. + int: HTTP 200 on success, 500 on error. + """ try: session_id = request.args.get('session_id') print(f"[DEBUG] [api_gateway] [news_process] Called with session_id: {session_id}") @@ -199,12 +272,28 @@ def post(self): 'message': str(e) }, 500 -# User authentication endpoints @auth_ns.route('/signup') class Signup(Resource): @auth_ns.expect(signup_model) def post(self): - """Register a new user""" + """Register a new user in the system. + + Creates a new user account with the provided information and generates + a JWT token for immediate authentication. + + Expected JSON payload: + { + 'username': str (required), + 'password': str (required), + 'email': str (required), + 'firstName': str (optional), + 'lastName': str (optional) + } + + Returns: + dict: Contains user data (excluding password) and JWT token. + int: HTTP 201 on success, 400 on validation error, 500 on server error. + """ print("[DEBUG] [api_gateway] [signup] User signup endpoint called") data = request.get_json() username = data.get('username') @@ -266,7 +355,20 @@ def post(self): @auth_ns.route('/login') class Login(Resource): def post(self): - """Login and get authentication token""" + """Authenticate user and generate JWT token. + + Validates user credentials and generates a JWT token for authenticated access. + + Expected JSON payload: + { + 'username': str (required), + 'password': str (required) + } + + Returns: + dict: Contains user data (excluding password) and JWT token. + int: HTTP 200 on success, 400 on validation error, 401 on invalid credentials. + """ print("[DEBUG] [api_gateway] [login] Login endpoint called") data = request.get_json() username = data.get('username') @@ -303,7 +405,15 @@ class UserProfile(Resource): @token_required @user_ns.marshal_with(user_profile_model) def get(self): - """Get user profile information""" + """Retrieve authenticated user's profile information. + + Requires a valid JWT token in the Authorization header. + Returns the user's profile data excluding sensitive information. + + Returns: + dict: User profile data including id, username, email, and names. + int: HTTP 200 on success, 404 if user not found. + """ print("[DEBUG] [api_gateway] [user_profile] Called") auth_header = request.headers.get('Authorization') token = auth_header.split()[1] @@ -324,10 +434,17 @@ def get(self): class Bookmark(Resource): @token_required def get(self): - """Get all bookmarked articles for the authenticated user""" + """Retrieve all bookmarks for the authenticated user. + + Requires a valid JWT token in the Authorization header. + Returns a list of bookmarked articles for the current user. + + Returns: + dict: Contains list of bookmarked articles and success status. + int: HTTP 200 on success, 500 on error. + """ try: print("[DEBUG] [api_gateway] [get_bookmarks] Called") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [get_bookmarks] Decoding token: {token[:10]}...") @@ -335,7 +452,6 @@ def get(self): user_id = payload.get('sub') print(f"[DEBUG] [api_gateway] [get_bookmarks] Getting bookmarks for user: {user_id}") - # Get bookmarks using the news_storage service bookmarks = get_user_bookmarks(user_id) print(f"[DEBUG] [api_gateway] [get_bookmarks] Found {len(bookmarks)} bookmarks") @@ -354,10 +470,22 @@ def get(self): @token_required def post(self): - """Add a bookmark for a news article""" + """Add a new bookmark for the authenticated user. + + Requires a valid JWT token in the Authorization header. + Creates a bookmark linking the user to a specific news article. + + Expected JSON payload: + { + 'news_id': str (required) + } + + Returns: + dict: Contains bookmark ID and success status. + int: HTTP 201 on success, 400 on validation error, 500 on server error. + """ try: print("[DEBUG] [api_gateway] [add_bookmark] Called") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [add_bookmark] Decoding token: {token[:10]}...") @@ -365,7 +493,6 @@ def post(self): user_id = payload.get('sub') print(f"[DEBUG] [api_gateway] [add_bookmark] Adding bookmark for user: {user_id}") - # Get the news article ID from the request body data = request.get_json() news_id = data.get('news_id') print(f"[DEBUG] [api_gateway] [add_bookmark] News article ID: {news_id}") @@ -374,7 +501,6 @@ def post(self): print("[DEBUG] [api_gateway] [add_bookmark] News article ID missing in request") return {'error': 'News article ID is required'}, 400 - # Add the bookmark using the news_storage service print(f"[DEBUG] [api_gateway] [add_bookmark] Adding bookmark for user {user_id}, article {news_id}") bookmark = add_bookmark(user_id, news_id) print(f"[DEBUG] [api_gateway] [add_bookmark] Bookmark added with ID: {bookmark['id'] if isinstance(bookmark, dict) else bookmark}") @@ -399,10 +525,20 @@ def post(self): class BookmarkDelete(Resource): @token_required def delete(self, bookmark_id): - """Remove a bookmark for a news article""" + """Remove a bookmark for a news article. + + Requires a valid JWT token in the Authorization header. + Deletes the specified bookmark for the authenticated user. + + Args: + bookmark_id (str): The ID of the bookmark to be deleted. + + Returns: + dict: Contains success message. + int: HTTP 200 on success, 500 on error. + """ try: print(f"[DEBUG] [api_gateway] [delete_bookmark] Called for bookmark: {bookmark_id}") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [delete_bookmark] Decoding token: {token[:10]}...") @@ -410,7 +546,6 @@ def delete(self, bookmark_id): user_id = payload.get('sub') print(f"[DEBUG] [api_gateway] [delete_bookmark] Deleting bookmark {bookmark_id} for user {user_id}") - # Delete the bookmark using the news_storage service result = delete_bookmark(user_id, bookmark_id) print(f"[DEBUG] [api_gateway] [delete_bookmark] Deletion result: {result}") @@ -426,21 +561,22 @@ def delete(self, bookmark_id): 'status': 'error', 'message': str(e) }, 500 - - - -# Import story tracking service -from backend.microservices.story_tracking_service import ( - create_tracked_story, get_tracked_stories, get_story_details, - delete_tracked_story, find_related_articles, update_all_tracked_stories -) -print("[DEBUG] [api_gateway] [startup] Story tracking service modules imported") @story_tracking_ns.route('/') class StoryTracking(Resource): @story_tracking_ns.param('keyword', 'Keyword to track for news updates') def get(self): - """Fetch latest news for a tracked keyword""" + """Fetch latest news for a tracked keyword. + + Retrieves and processes the latest news articles for a given keyword. + + Args: + keyword (str): The keyword to search for news articles. + + Returns: + dict: Contains list of processed articles and success status. + int: HTTP 200 on success, 400 if keyword is missing, 500 on error. + """ try: print("[DEBUG] [api_gateway] [story_tracking] Story tracking get endpoint called") keyword = request.args.get('keyword') @@ -485,10 +621,23 @@ def get(self): @token_required def post(self): - """Create a new tracked story""" + """Create a new tracked story. + + Requires a valid JWT token in the Authorization header. + Creates a new tracked story for the authenticated user based on a keyword and source article. + + Expected JSON payload: + { + 'keyword': str (required), + 'sourceArticleId': str (optional) + } + + Returns: + dict: Contains created story details and success status. + int: HTTP 201 on success, 400 on validation error, 500 on server error. + """ try: print("[DEBUG] [api_gateway] [story_tracking] Called") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [story_tracking] Decoding token: {token[:10]}...") @@ -496,7 +645,6 @@ def post(self): user_id = payload.get('sub') print(f"[DEBUG] [api_gateway] [story_tracking] Creating tracked story for user: {user_id}") - # Get request data data = request.get_json() keyword = data.get('keyword') source_article_id = data.get('sourceArticleId') @@ -534,10 +682,17 @@ def post(self): class UserStoryTracking(Resource): @token_required def get(self): - """Get all tracked stories for the authenticated user""" + """Get all tracked stories for the authenticated user. + + Requires a valid JWT token in the Authorization header. + Retrieves all tracked stories associated with the authenticated user. + + Returns: + dict: Contains list of tracked stories and success status. + int: HTTP 200 on success, 500 on error. + """ try: print("[DEBUG] [api_gateway] [user_story_tracking] Called") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [user_story_tracking] Decoding token: {token[:10]}...") @@ -566,7 +721,18 @@ def get(self): class StoryTrackingDetail(Resource): @token_required def get(self, story_id): - """Get details for a specific tracked story""" + """Get details for a specific tracked story. + + Requires a valid JWT token in the Authorization header. + Retrieves detailed information about a specific tracked story. + + Args: + story_id (str): The ID of the tracked story to retrieve. + + Returns: + dict: Contains story details and success status. + int: HTTP 200 on success, 404 if story not found, 500 on error. + """ try: print(f"[DEBUG] [api_gateway] [story_tracking_detail] Called for story: {story_id}") print(f"[DEBUG] [api_gateway] [story_tracking_detail] Calling get_story_details for story: {story_id}") @@ -596,10 +762,20 @@ def get(self, story_id): @token_required def delete(self, story_id): - """Stop tracking a story""" + """Stop tracking a story. + + Requires a valid JWT token in the Authorization header. + Deletes a tracked story for the authenticated user. + + Args: + story_id (str): The ID of the tracked story to delete. + + Returns: + dict: Contains success message. + int: HTTP 200 on success, 404 if story not found, 500 on error. + """ try: print(f"[DEBUG] [api_gateway] [delete_story_tracking] Called for story: {story_id}") - # Get the user ID from the token auth_header = request.headers.get('Authorization') token = auth_header.split()[1] print(f"[DEBUG] [api_gateway] [delete_story_tracking] Decoding token: {token[:10]}...") @@ -634,6 +810,14 @@ def delete(self, story_id): @app.route('/api/story_tracking', methods=['OPTIONS']) def story_tracking_options(): + """Handle OPTIONS requests for the story tracking endpoint. + + This function sets the necessary CORS headers for preflight requests + to the story tracking endpoint. + + Returns: + Response: A Flask response object with appropriate CORS headers. + """ print("[DEBUG] [api_gateway] [story_tracking_options] Called") response = make_response() response.headers.add("Access-Control-Allow-Origin", "*") diff --git a/backend/core/__pycache__/config.cpython-312.pyc b/backend/core/__pycache__/config.cpython-312.pyc index a3e04fcbd62f5bcab905b0d58e3b5591c20f3d71..551f6789539713c20d8e1c2b709f291deb6aa240 100644 GIT binary patch delta 220 zcmbOzv`L8fG%qg~0}$-=-JAY%BJT`FnT^YKF!GC}@TLl{W&+7DFfcMqZe*0S6-W_; ziA0H}2t|pdil+#tGN&=6h_tY*hA3r-l7NefB8f_F-pCZh#Hg~Fg*BOxQEPGrn+2oP zTZlX-=wLk?G`z9Ll_EjOw2lfCN|x0L2nBP5=M^ delta 212 zcmdlaG*O86G%qg~0}%Y)xjy~sMBW*UTpO3|V4U2-C}t~=BDk6fB*(xIC6Xc(C7LRh zBAm*c#*`w`!m=77!4M@57ZpVkmDqfkDTs+td9xU6G9#nr^WrfDD)(rrpNp>exO+fbS|sFB)X;<=>b_z~`m zNz>Y-i;&hxNYoluX|$mBv#b6Tl@E|Wf*&5cd^ikJ(gBGg#0TQXv|VYJ<%4_28Ix9A z*=NrCoO|!N=NOOvI?(rTpU(~OdjHsmv!^2fd`}2#**1`S|Dpg`2B)gM$A%?JqO;y< zFAbmz`fgwWS<`;#$iy4?W(Zj!VA0ru&2&|7wmsg8o!GOXTzER1P+SZjm3Y&XjNPCcL8n8IgKd&$KPIuv4gHp` zsn_7k^$ zaweFmoftJ(MMd&F+JSMz5rE49pzRnW85AS7%2bKUnAL1qH@>6b4LVKFf;4@` z0YI59(GtyLQI&q(WdmiVjAdn8i9r(&#SJmG8^)_+#Qst2 zvJLyTmu#cp%v~JYq57@dGC|^rca$8GlVtM`@OE>D6eu~g;Q%+;toX9Ct#S@*;BNRihhXP-?!QvMb-ViEsMlU6RMjzv;nMvS)bhF(wprNOt1e%B+!#&nx*X zn+C_zd`%5H)MELvvfI!L{LUx4+8n*)Qrnt2o4Gyv2f0bXx03fg(zu6VAzsT0o12BM%o8L6!`+0u=?W(tIBE?BJ%c7w>H(74czyFSl89~O1#$dl+GVm<5R1A>LXd_kFW8QtNdi`9`_QbJ|l}tmkU)O7FFE zdaQrFv-|4Bm5bL8=$-r4qwy=n>dWi#9ara8=B_`Y#|N|$x1*Qy^!Wb+JieAX54I%g z!xRW?t2w*=cXyHk(WBIV?@v*{%iRSu8_a%?uPx&$NNgqq#fa~x!=98gqhvUqraTeo-QxI|@E)yY1{Yd71GGRCbJfG5n zVAwJWZu_MD7|e{a2a$Ge*bBI3MA-<FshX)yc-UsGhxJgZ>PE3s*Oqo-!MG(Y%zMz`JNWn3hh1@7$ zQc3>=_b>$J2>Zp^zCTxxidlF7oySbFJH?A(ouMe|uC@|wW4;D1?cHe4__tu^4ON^Gg+kX2=wp=iv2LQty9q-`~O`Bz#o_}7nw(vwi5tcyimJ$hXu%XnZ1EXy!u?SZD4&4 zUkzniaGNyJpa*#Ei^9-&p9c{R*=4Fr*3;ldUaoK58lG(x&%1`_l$g`RvZikhmzC_R zxPF7{iYfLMlGY3h)es2NZ(qp9I%$*I_M0Pl|($I$U3FOlL_6C5TV!) z#l$fMvRaS*YkDIR^WS=E=gE&Num-KDFq(iR2!}XpScpLwfd~nG7%#=35&;Dll;?R^ zg?8NolQBU_4iAekIwq>ocnsE|^OjqOUG)Uv)4nO#QDg|R{jw?y%A+H3C@HWB$?Vhq zEI3r@K^V5UbO*tUzz$MCu}DN3?N@+gxoSxhD$p{nFhAx5AZ<};By6mdMIvula(Dws zlUg5H(mBK%I7DGh2^1Bg9DsNeP>T!^Hym*zD6r%=mv0>d@6x&wu%_t=Iw{{aJz}a7@ zT=5H0@$IPiztQXYl-(%5Bjp$fnI>2Efekpp29M;qxy}q7m2Ra~4#{ht<5sCQQ+FV& zUGR8o=@cpXZu)QJA1h(BZgun4woB$xj@KKNqI*C&*aj40#6o~xK%n1w;%v_evKNdH zYdL4@%yX4DW2peJChc71soc020To|+W2KyCnO^tZi{ec?PdZ2Q`v@@ta`q!d-MUSZ zkBZ45b<8cqIipGw{As$A?)PQJq z1Klbnkg#OT2;JLrVDH}6eO-O+Z9>i@vS!T%tJA}SQWWZxlpM~_uEJJq%a~iN?oeb! zK@&-cO1d4DLKckT7A!YGNQmk&ox&`gG4s(fzLG{t>Z3sz}vKliDKj(gEx$)K;yDX>UwrkfwRgY&_%iTCCMrRRqxw`P3XC+rKslDS}Q zHds4t&jxo+6g+l$&TpA4pLJDDRV)OGFOFUqo%Ctr6YsttLBYmFD<~<|?)l3+&#fS^ zd8%`|{d)I(u5qICq2}XiT7vv!ARCO#hf1&2T&|fsmZ{r)Kh%oI$?9yVW~w6_s-G{Z znoMU)8s-DxOUVl-=fjoP1}_gzZN49VOZ$qe9QQh^3;qTUmwV9!PFQ7DPE-ACF@=0}p zEj-tCrYplmrrx+sJYaX>R4N`b1?QY+oEffu`u*wVjIVW;+4H-*Y&^r1y=W&;u31;r z%NI>ZGUFY2leQ9Tq z{5eP?TxSGfSd7L5AwoPstwfKZJHqKUL6G7{1Oa}Cj-W^TAkbadfKV)s?-=5^weJI! zdm@;>a=-Cs58}IV9V{UTjBn^mrfE1XC5Pk|7)NK=i*g46lnz1sr5!IS*3K1GlaGi3 rjR=0U>6xXLXkH?LC9sIG#X|(3*om=wWbh&7dq8oEB%neth79} 0: - # Article already exists; return its id. + # Article already exists; return its id return existing.data[0]["id"] else: - # Insert a new article. + # Insert a new article with all available fields result = supabase.table("news_articles").insert({ "title": article["title"], "summary": article.get("summary", ""), "content": article.get("content", ""), - # The source can be a dict (from API) or a plain string. + # Handle source field which can be a dict (from API) or a plain string "source": article["source"]["name"] if isinstance(article.get("source"), dict) else article["source"], "published_at": article["publishedAt"], "url": article["url"], @@ -39,11 +73,26 @@ def store_article_in_supabase(article): def log_user_search(user_id, news_id, session_id): """ Logs a search event by inserting a record into the user_search_history join table. + + This function creates a record of a user viewing or searching for a specific article, + which can be used for analytics, personalization, and tracking user activity across sessions. + + Args: + user_id (str): The ID of the user performing the search + news_id (str): The ID of the news article that was viewed/searched + session_id (str): The current session identifier for tracking user activity + + Returns: + dict: The Supabase response object containing the result of the insert operation """ + # Create a timestamp for when the search occurred + current_time = datetime.datetime.utcnow().isoformat() + + # Insert the search record with all required fields result = supabase.table("user_search_history").insert({ "user_id": user_id, "news_id": news_id, - "searched_at": datetime.datetime.utcnow().isoformat(), + "searched_at": current_time, "session_id": session_id, }).execute() return result @@ -51,25 +100,56 @@ def log_user_search(user_id, news_id, session_id): def add_bookmark(user_id, news_id): """ Adds a bookmark by inserting a record into the user_bookmarks table. - Returns the created bookmark record if successful. + + This function creates a bookmark relationship between a user and a news article, + allowing users to save articles for later reading. + + Args: + user_id (str): The ID of the user adding the bookmark + news_id (str): The ID of the news article to bookmark + + Returns: + dict or None: The created bookmark record if successful, None otherwise + + Raises: + Exception: If there's an error during the database operation """ try: + # Insert a new bookmark record linking user to article result = supabase.table("user_bookmarks").insert({ "user_id": user_id, "news_id": news_id, }).execute() + + # Return the first data item if available, otherwise None return result.data[0] if result.data else None except Exception as e: print(f"Error adding bookmark: {str(e)}") + # Re-raise the exception for proper error handling upstream raise e def get_user_bookmarks(user_id): """ Retrieves all bookmarked articles for a user with full article details. - Returns a list of bookmarked articles with their details. + + This function performs a join between the user_bookmarks table and the news_articles table + to retrieve complete article information for all articles bookmarked by the specified user. + The results are transformed into a more user-friendly format where each article includes its + bookmark_id for reference. + + Args: + user_id (str): The ID of the user whose bookmarks should be retrieved + + Returns: + list: A list of dictionaries, each containing the full details of a bookmarked article + with an additional 'bookmark_id' field + + Raises: + Exception: If there's an error during the database operation """ try: # Query user_bookmarks and join with news_articles to get full article details + # This uses Supabase's foreign key relationships to perform the join result = supabase.table("user_bookmarks") \ .select( "id," @@ -78,26 +158,51 @@ def get_user_bookmarks(user_id): .eq("user_id", user_id) \ .execute() - # Transform the result to a more friendly format + # Transform the nested result structure to a more friendly format + # by flattening the news_articles data and adding the bookmark_id bookmarks = [] for item in result.data: article = item["news_articles"] - article["bookmark_id"] = item["id"] + article["bookmark_id"] = item["id"] # Add bookmark ID to article for reference bookmarks.append(article) return bookmarks except Exception as e: print(f"Error fetching bookmarks: {str(e)}") + # Re-raise the exception for proper error handling upstream raise e def delete_bookmark(user_id, bookmark_id): """ Deletes a bookmark from the user_bookmarks table. - Returns True if successful, False otherwise. + + This function removes a bookmark relationship between a user and an article. + It ensures that users can only delete their own bookmarks by checking both the + bookmark_id and user_id in the query. + + Args: + user_id (str): The ID of the user who owns the bookmark + bookmark_id (str): The ID of the bookmark to delete + + Returns: + bool: True if the bookmark was successfully deleted, False if no bookmark was found + or if the deletion was unsuccessful + + Raises: + Exception: If there's an error during the database operation """ try: - result = supabase.table("user_bookmarks").delete().eq("id", bookmark_id).eq("user_id", user_id).execute() + # Delete the bookmark, ensuring it belongs to the specified user + # This double condition prevents users from deleting other users' bookmarks + result = supabase.table("user_bookmarks") \ + .delete() \ + .eq("id", bookmark_id) \ + .eq("user_id", user_id) \ + .execute() + + # Return True if at least one record was deleted, False otherwise return len(result.data) > 0 except Exception as e: print(f"Error deleting bookmark: {str(e)}") + # Re-raise the exception for proper error handling upstream raise e \ No newline at end of file diff --git a/backend/microservices/nope.env b/backend/microservices/nope.env deleted file mode 100644 index 9e0a1db..0000000 --- a/backend/microservices/nope.env +++ /dev/null @@ -1,17 +0,0 @@ -# API Configuration -API_HOST=localhost -API_PORT=5001 - -# CORS Configurationasd -CORS_ORIGINS=http://localhost:5173,http://localhost:3000,http://localhost:5001 - -# Redis Configuration -REDIS_HOST=localhost -REDIS_PORT=6379 - -# API Keys -NEWS_API_KEY=4b94554081e148bc964e4ab94c9dc0fe -OPENAI_API_KEY=your_openai_api_key_here - -# Logging -LOG_LEVEL=INFO \ No newline at end of file diff --git a/backend/microservices/story_tracking_service.py b/backend/microservices/story_tracking_service.py index caafb48..8d229f8 100755 --- a/backend/microservices/story_tracking_service.py +++ b/backend/microservices/story_tracking_service.py @@ -1,7 +1,27 @@ #!/usr/bin/env python3 """ story_tracking_service.py - Microservice for Story Tracking -Provides functionality for tracking news stories by keyword and finding related articles. + +This service provides functionality for tracking news stories by keyword and finding related articles. +It integrates with Supabase for data persistence and manages user story tracking preferences. + +Key Features: +- Story tracking by keywords +- Related article discovery +- User story management +- Automatic story updates + +The service uses clustering algorithms to group similar articles and maintains +relationships between tracked stories and their associated articles. + +Database Tables Used: +- tracked_stories: Stores user story tracking preferences +- tracked_story_articles: Links stories to their related articles +- news_articles: Stores article content and metadata + +Environment Variables Required: +- VITE_SUPABASE_URL: Supabase project URL +- SUPABASE_SERVICE_ROLE_KEY: Service role key for admin access """ import os @@ -11,25 +31,41 @@ from summarization.story_tracking.story_tracking import cluster_articles from backend.microservices.news_fetcher import fetch_news +# Service initialization logging print("[DEBUG] [story_tracking_service] [main] Story tracking service starting...") -# Load environment variables +# Load environment variables from .env file load_dotenv() print("[DEBUG] [story_tracking_service] [main] Environment variables loaded") # Initialize Supabase client with service role key for admin access to bypass RLS +# RLS (Row Level Security) policies are bypassed when using the service role key SUPABASE_URL = os.getenv("VITE_SUPABASE_URL") -# Use service role key instead of anon key to bypass RLS policies SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY") print(f"[DEBUG] [story_tracking_service] [main] Supabase URL: {SUPABASE_URL}") print(f"[DEBUG] [story_tracking_service] [main] Supabase Key: {SUPABASE_SERVICE_KEY[:5]}..." if SUPABASE_SERVICE_KEY else "[DEBUG] [story_tracking_service] [main] Supabase Key: None") +# Create Supabase client for database operations supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY) print("[DEBUG] [story_tracking_service] [main] Supabase client initialized") def run_story_tracking(article_embeddings): + """ + Runs the story tracking algorithm on a set of article embeddings to identify story clusters. + + This function uses clustering algorithms to group similar articles together based on their + vector embeddings, helping to identify distinct news stories or topics. + + Args: + article_embeddings: List of vector embeddings for articles. Each embedding should be + a numerical vector representing the article's content. + + Returns: + list: A list of cluster labels indicating which story cluster each article belongs to. + Empty list is returned if article_embeddings is None or empty. + """ print(f"[DEBUG] [story_tracking_service] [run_story_tracking] Running story tracking with {len(article_embeddings) if article_embeddings else 0} embeddings") labels = cluster_articles(article_embeddings) print(f"[DEBUG] [story_tracking_service] [run_story_tracking] Clustering complete, found {len(labels) if labels else 0} labels") @@ -333,8 +369,14 @@ def update_all_tracked_stories(): """ Background job to update all tracked stories with new related articles. + This function is designed to be run as a scheduled task to keep all tracked stories + up-to-date with the latest news articles. It iterates through all tracked stories in the + database and calls find_related_articles() for each one to fetch and link new articles. + Returns: - Dictionary with counts of stories updated and new articles found + dict: A dictionary containing statistics about the update operation: + - stories_updated: Number of stories that received new articles + - new_articles: Total number of new articles added across all stories """ print(f"[DEBUG] [story_tracking_service] [update_all_tracked_stories] Starting update of all tracked stories") try: @@ -374,7 +416,7 @@ def update_all_tracked_stories(): raise e if __name__ == '__main__': - # Example usage + # Example usage - this code runs when the script is executed directly print("[DEBUG] [story_tracking_service] [main] Running story_tracking_service.py as main") result = update_all_tracked_stories() print(f"[DEBUG] [story_tracking_service] [main] Updated {result['stories_updated']} stories with {result['new_articles']} new articles") diff --git a/backend/microservices/summarization_service.py b/backend/microservices/summarization_service.py index 52c123b..c6074da 100755 --- a/backend/microservices/summarization_service.py +++ b/backend/microservices/summarization_service.py @@ -1,6 +1,15 @@ #!/usr/bin/env python3 """ -summarization_service.py - Microservice for Summarization +Summarization Service Module + +This module provides functionality for fetching, processing, and summarizing news articles. +It includes capabilities for content extraction, text summarization, and keyword extraction. + +Key Features: +- Article content fetching from URLs +- Text summarization using OpenAI's GPT models +- Keyword extraction using YAKE +- Integration with Supabase for data persistence """ import json @@ -31,6 +40,19 @@ @log_exception(logger) def fetch_article_content(url): + """ + Fetches and extracts the main content from a given URL. + + Args: + url (str): The URL of the article to fetch content from. + + Returns: + str or None: The extracted article content as plain text. + Returns None if the fetch fails or content is invalid. + + Raises: + Various requests exceptions are caught and logged internally. + """ try: # Check if URL is valid if not url or not url.startswith('http'): @@ -72,6 +94,21 @@ def fetch_article_content(url): @log_exception(logger) def run_summarization(text): + """ + Generates a concise summary of the provided text using OpenAI's GPT model. + + Args: + text (str): The input text to be summarized. + + Returns: + str: A summarized version of the input text (approximately 150 words). + Returns an error message if summarization fails. + + Note: + Uses OpenAI's GPT-4 model with specific parameters for optimal summarization: + - Temperature: 0.5 (balanced between creativity and consistency) + - Max tokens: 200 (ensures concise output) + """ try: return "Summarized text here" @@ -91,7 +128,17 @@ def run_summarization(text): return "Error generating summary" @log_exception(logger) -def get_keywords(text,num_keywords=1): +def get_keywords(text, num_keywords=1): + """ + Extracts key phrases from the input text using YAKE keyword extraction. + + Args: + text (str): The input text to extract keywords from. + num_keywords (int, optional): Number of keywords to extract. Defaults to 1. + + Returns: + list: A list of extracted keywords/key phrases. + """ kw_extractor = yake.KeywordExtractor(top=num_keywords, lan='en') keywords = kw_extractor.extract_keywords(text) return [kw[0] for kw in keywords] @@ -99,6 +146,28 @@ def get_keywords(text,num_keywords=1): @log_exception(logger) def process_articles(session_id): + """ + Processes a batch of articles associated with a specific session ID. + + This function performs the following operations: + 1. Retrieves articles from Supabase based on the session ID + 2. Fetches missing content for articles if needed + 3. Generates summaries for each article + 4. Extracts keywords for filtering + + Args: + session_id (str): The unique identifier for the user session. + + Returns: + list: A list of dictionaries containing processed article data including: + - Basic article metadata (title, author, source, etc.) + - Generated summary + - Extracted keywords + - Original and fetched content + + Raises: + Exception: If there's an error during processing, it's logged and re-raised. + """ try: # Query only articles that belong to the current session. # result = supabase.table("news_articles").select("*").eq("session_id", session_id).execute() From 61cf18ada1e1dd31d593a7010bf7742a91140776 Mon Sep 17 00:00:00 2001 From: Akalpit Dawkhar Date: Sun, 2 Mar 2025 13:40:04 -0500 Subject: [PATCH 04/12] Cleaned up the repository --- .DS_Store | Bin 10244 -> 10244 bytes backend/.DS_Store | Bin 6148 -> 6148 bytes .../summarization_service.cpython-312.pyc | Bin 6524 -> 8893 bytes backend/microservices/nope.env | 17 +++++ backend/realtime/realtime.py | 23 ------- {processing => data}/.DS_Store | Bin 6148 -> 6148 bytes frontend/.DS_Store | Bin 6148 -> 0 bytes frontend/mobile/README.md | 12 ---- frontend/web/app.js | 6 -- frontend/web/index.html | 42 ------------ frontend/web/style.css | 18 ------ ingestion/.DS_Store | Bin 6148 -> 0 bytes ingestion/api_connectors/api_connector.py | 30 --------- ingestion/rss_reader/rss_reader.py | 23 ------- ingestion/scraper/scraper.py | 33 ---------- monitoring/.DS_Store | Bin 6148 -> 6148 bytes monitoring/ci_cd/Dockerfile | 29 --------- monitoring/ci_cd/docker-compose.yml | 60 ------------------ processing/caching/cache.py | 32 ---------- processing/preprocessing/preprocess.py | 20 ------ processing/storage/config.py | 19 ------ run.sh | 6 -- summarization/.DS_Store | Bin 6148 -> 0 bytes .../story_tracking/story_tracking.py | 29 --------- .../summarization_service/summarize.py | 22 ------- summarization/summary_cache/summary_cache.py | 33 ---------- tests/.DS_Store | Bin 6148 -> 0 bytes tests/test_sample.py | 13 ---- 28 files changed, 17 insertions(+), 450 deletions(-) create mode 100644 backend/microservices/nope.env delete mode 100644 backend/realtime/realtime.py rename {processing => data}/.DS_Store (79%) delete mode 100644 frontend/.DS_Store delete mode 100644 frontend/mobile/README.md delete mode 100644 frontend/web/app.js delete mode 100644 frontend/web/index.html delete mode 100644 frontend/web/style.css delete mode 100644 ingestion/.DS_Store delete mode 100755 ingestion/api_connectors/api_connector.py delete mode 100755 ingestion/rss_reader/rss_reader.py delete mode 100755 ingestion/scraper/scraper.py delete mode 100644 monitoring/ci_cd/Dockerfile delete mode 100644 monitoring/ci_cd/docker-compose.yml delete mode 100755 processing/caching/cache.py delete mode 100755 processing/preprocessing/preprocess.py delete mode 100644 processing/storage/config.py delete mode 100644 run.sh delete mode 100644 summarization/.DS_Store delete mode 100755 summarization/story_tracking/story_tracking.py delete mode 100755 summarization/summarization_service/summarize.py delete mode 100755 summarization/summary_cache/summary_cache.py delete mode 100644 tests/.DS_Store delete mode 100755 tests/test_sample.py diff --git a/.DS_Store b/.DS_Store index 04725a54f9a5351183586519b5658fed14417ec3..58839f9fdc6c790f7f127e5f3a68a7b7d3bda865 100644 GIT binary patch delta 305 zcmZn(XbG6$&uF+YU^hRb;btCz15B2Q3?&SSo;mr+NjdpR3=9kcKTmV(KV{(FEA-6cFf(u`e`zYEIqb|dSVyi;i7W)@K{ zMtN3-Jcd+;a)x3I3)eF+F!D_{7gLz*F3LCgz7XqXGcg|~b77#;QicMc-AO=P4AiUu zBnugGfOG*vK9J62NM@)+bv>i(a%`!PfRDAMc(QFc3pvMFN DO}S2! delta 437 zcmZn(XbG6$&uF$WU^hRb*=8Pr15A_e2+!k8V<=+CXUJnHnXDivQqL&Mz`&sQ9}Ivj z1_llWxB`Y$pt6*lbi?4}{M-VtEQ6vbHZ{5VE-pzq`AHx-<4w2j+*^0d5xcq+RCR(F z3MWq#5}8~t%39CKkO{Ot9q5Q+h7uqf=w{EH{A8##0t^g{-9RglosLBZs^htwfjY3L zLU#U%?Wx{7mmPET$E6hE1DI0mDho1@!_8oGiHHE> MbA<>GO#C4u05LmusQ>@~ diff --git a/backend/.DS_Store b/backend/.DS_Store index 5ef017d85d367b114ab8e89fb67ba494252212fa..58da69c9c920f36cc6943f673cf31af787b26ef6 100644 GIT binary patch delta 28 kcmZoMXfc@J&&abeU^g=(&t@K$cT5{gco{ddbNuB80D>9_)c^nh delta 225 zcmZoMXfc@J&nUPtU^g?P;AS3{cTDvh3`Gp742cXm3?&Si47m)co;mr+NjdpR3=9kc z3=E92Kw9rV7ywxe3>fNj(hY-?^K%QpvJ6ZPKpI0$ZoZ2P)J%?i<7&U1%Z@oh)VZQs ao|~V7Lm{fVf(&F!1tu1HZf58B%MSn^hB!F@ diff --git a/backend/microservices/__pycache__/summarization_service.cpython-312.pyc b/backend/microservices/__pycache__/summarization_service.cpython-312.pyc index d0e0e7446022c7fb43d31485cac8a24f0627f9e5..5203afd1b25f7da9c48aae098400c724886c1f4d 100644 GIT binary patch delta 2971 zcmZ`*O>7%Q6!v+ zZ~mS6W^VY$p`l_9KMy|pu6p;#SHpi=G5gTU<%*Z1dYy%0jVTfMaG8fq;c&PRxRJ+; z#pRliP}eum2!p0@xr9pOJG7d4Lbah1giztisR_S2L5mzNrM_mq3o;Fle}K5MIo2v;v~$P#GSx#Ry0(?pbl4-&P!ai{74FjXg6@N!Ts6!qY}M*Jc;y@cBG$q8ggswuGv|ezKoUM)nqyzwH51o$~iG(h|alN2`zs>Y$tgx_hrZa0IfXmX$0j#$yl zvtbo+qAU7Dp$B79h2v#d#sF8A;`0zxbg+~`paR3YErHWuOB`|cd*Wdzz=kgdJ~t85 z`na1hvJ-|$Xn0gG(XEtK6q~xz?=-%S2g1~)`2-avJV~iC0;7P0-$bprS#3;6qGT5E zD#|a2B!v8(h)X50bq;Tkz(_3|r4dzY>NVX}FQ`_rWP!8_QGZ_B)c1R#rc`5XblxPQ zu9ArAOqF4A{lEUd@<-KC#HbpE5|!98&EQMnGZY+5zIPz8mn@GI&(B5Y7KX~ zxy3;7F05A&&)AlryG_ms*aYSjjaq* zr4ul=!aRoV;bO;Bi=(QX5RFeX$(~7zh+OJ0?ePxnCQEREt)hxRbLcH&0@m8@%m!s^ko$B(E8M9iKD-2zi(q*F+n3RDdkmRo68Uxu#$%bfX zam=4F=`|hnl+UI*%_H&-iA_bsDuL$(Et-IVW*ym*VdHcOP$!|_O)4WD>jvu1l%rAS z0*aABb#v>iGo{YJs0)2$=y)a_09c%bMA7-2yWG{U9VKF*WMCaC_%aU6;tc7VW&?D=Cp(>x^|!gPr_*?(Mht!=q60RFs6j^I3AKB zRSUuikUX?IuQtzgp4`h1Sudqe@M;sfuPj*BNcw~>H{QzU3WImnezu?ftuXp) O;lPHSD;x@2Sla(MV512D delta 580 zcmdn%`p1axG%qg~0}$-=-J2dMwvq1zqfM1HS8-`>ZemepRbokIeqMZWYEfBca;jcI zrGl=4Z)S2)K2$^@Ex$-17+LM+HfDK7R)!Sj6qd=WSk#$W*RV~#!s5-yK3RcPk&$Dv z6RWN$XAQ$_hPh0u8A0|kFfcMyGNf=#ZeiUo%8f1)36z^`z%DqKMAwFM9z>|fCtF?z|6?V_=$yyk>xYTN`^Y57s0Eb)vOcQ8Bt9`QGpa!)fUJ)^Fo};$ mri=nm2_Wl30V4woPe;`yX33i@B3D_&J}@(|NEMX>-30)%>6t_T diff --git a/backend/microservices/nope.env b/backend/microservices/nope.env new file mode 100644 index 0000000..9e0a1db --- /dev/null +++ b/backend/microservices/nope.env @@ -0,0 +1,17 @@ +# API Configuration +API_HOST=localhost +API_PORT=5001 + +# CORS Configurationasd +CORS_ORIGINS=http://localhost:5173,http://localhost:3000,http://localhost:5001 + +# Redis Configuration +REDIS_HOST=localhost +REDIS_PORT=6379 + +# API Keys +NEWS_API_KEY=4b94554081e148bc964e4ab94c9dc0fe +OPENAI_API_KEY=your_openai_api_key_here + +# Logging +LOG_LEVEL=INFO \ No newline at end of file diff --git a/backend/realtime/realtime.py b/backend/realtime/realtime.py deleted file mode 100644 index b899649..0000000 --- a/backend/realtime/realtime.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 -""" -realtime.py - Real-time Communication Module -This example uses Flask-SocketIO to enable real-time updates. -""" - -from flask import Flask, render_template -from flask_socketio import SocketIO, emit - -app = Flask(__name__) -app.config['SECRET_KEY'] = 'secret!' -socketio = SocketIO(app) - -@app.route('/') -def index(): - return "Real-time communication server running." - -@socketio.on('connect') -def test_connect(): - emit('message', {'data': 'Connected to real-time server.'}) - -if __name__ == '__main__': - socketio.run(app, host='0.0.0.0', port=6000) diff --git a/processing/.DS_Store b/data/.DS_Store similarity index 79% rename from processing/.DS_Store rename to data/.DS_Store index 53efefbe576a43793b232412f5888af5701f4a07..d6eb9db52e7fe607db56da77d4e4e28b11a45eae 100644 GIT binary patch delta 410 zcmZoMXfc@JFUrcmz`)4BAi&_6lb@WFlb;0S3vB+#yqvKfB*nsz$B@cU&QOdj4HS0- zV!i)h0AwMlODZoefEu$SsURn_xWvHV8Y2@k3o9Et2RjEhM{ICLetB?7Vo7PSQ({px zh!>KdpOXY*Cnkkurk2MGh}1jh=anR8=A{;ab!4Wb0@cKXXXd5kmpkQ`=A{&aH3vf^ zI5;^t;{_zDs|^iIbQDa?Eo*fYsx6HSfNW#4+FDKyQDuGWp!n>Z+`RlQu#*`Wf&K;q zUMLNtx`7N9hHR+M%YuvYa`N-ifr20nTnxnwr3|@1n8;AXkjYR544)LV&{+>uha5Us V^&p!I3~(`)i4C=z**X650|2>;Yw`d9 literal 6148 zcmeHKy-veG4ED8^8U#ok8S@HdV?YX3m>4^;(VwU-YDooj&kOJxyaO{5&%leYB0k&K zCTY^D6GF(Y>@RWbJKvWy&JmI8t;SuV4iN=V#>Np$jqr8WiZs0E1n6Xp89mTsKI!&H zX(ieme~|&ccWoNc06L|@{aY-x+~&L*u}I^1I8TST5`3QD-@LtkT-8O54~SY$`Dt0t z#?K+0(vlu25u2i}gr4xLsjYFbKe|}w_vP)2uXE4%i!`tKu#B4e+o<-LQmkP>6H4^| zuwJ2i7p6)P#X47Cvz*VZsJyn_Q8o22rzXaLF<=Z789>iw362%b8Ux0FF|c8P-wz>_ zG4hxx%BurLwgA8u+#)!ZZ#ZDc4q)UlQ-lZNG!>|+&TlcCro$ilxX5FssOjWnW{l%! zc7BKAWOn#N2`3jRnl%QDfieS4J?(J+zuvw7FDKcVF<=b*D+XLE?#DeW$@kXE;<(p_ r&^ss#$7PC}6dXt^Ml84DGpGpsAs+xEkC`Go5c?1i8q632zskT5{*ins diff --git a/frontend/.DS_Store b/frontend/.DS_Store deleted file mode 100644 index 406d4378509e0e31f29b7e1cbf38c4c2e10f1dfd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKy-veG47O6+O`4`T6>*zwOGhYPt&cqCUC2dU<|38^+Y%#%%8Qc@q9l{0ws`svAP zKV9r%S~8hyfTA_J*)P*iwO(%IS*1mJWMJaWP?UXz9dS8T+KlyUPn}b=ZT3 z6GuazodIVcXJBH>x%B^Yd@`e#{5-{1&VV!U#~9#gxhj` - - - News Aggregator - - - -

News Aggregator

-
-
-

Test Article

- -

- -
-
-
- - - - diff --git a/frontend/web/style.css b/frontend/web/style.css deleted file mode 100644 index 06442d3..0000000 --- a/frontend/web/style.css +++ /dev/null @@ -1,18 +0,0 @@ -/* style.css - Basic styling for the web interface */ -body { - font-family: Arial, sans-serif; - margin: 0; - padding: 0; - background-color: #f5f5f5; -} - -header { - background-color: #004080; - color: #fff; - padding: 20px; - text-align: center; -} - -main { - padding: 20px; -} diff --git a/ingestion/.DS_Store b/ingestion/.DS_Store deleted file mode 100644 index 5961be233457e5160c2c181c717d8c31d3cfe4ea..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKK~95E5S?F0Bxp>RE;)dk-T)h1x$FU|g=$TTkg9u5)0G!6aqTTUgz*Bt`SWdQ zfe<&w=nVPuKEn(=Uh)|}5t;F0HX<4lQGmhdhj1;%*LgH-;37vrWABktL3cRjls2+8 zpbDr0zfA#NyT^1zCEd~#t9HA7A246h{qklsNsGJ`;@*2-eqJQWY+1~Z7rcF3Up%j0 z&-!b!Z`YKIIxqfyQOJC8Oo4N}QwG1<)_dl~mTzBf3(o^^Ou>;= zZ!q+pz;G|4&g~`KXxhlyvg*!i;ltXg{XYfJv)O_}LG`MDDxeB%6yWbeguxg)tOV_+ z1I}&%fIh<3Ft@!J7;yj?JFEl|ff*?UN~v*M3?t=;M=mdRSP4ow8T(}1$35A&9g49} zM?A9WWMV<}s(>o6tAHn`L+=0Q-TVLDB0W6vTSDO_{$Ff_s1s( diff --git a/monitoring/ci_cd/Dockerfile b/monitoring/ci_cd/Dockerfile deleted file mode 100644 index 1b72406..0000000 --- a/monitoring/ci_cd/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# Dockerfile - Container image for the News Aggregator backend services -FROM python:3.9-slim - -# Set working directory -WORKDIR /app - -# Install curl for health checks -RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* - -# Copy requirements file and install dependencies -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the entire project -COPY . . - -# Expose port 5000 for the API gateway -EXPOSE 5000 - -# Add health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:5000/health || exit 1 - -# Set resource limits -ENV PYTHONUNBUFFERED=1 -ENV PYTHONIOENCODING=UTF-8 - -# Command to run the API gateway -CMD ["python", "news-aggregator/backend/api_gateway/api_gateway.py"] diff --git a/monitoring/ci_cd/docker-compose.yml b/monitoring/ci_cd/docker-compose.yml deleted file mode 100644 index 1e7edec..0000000 --- a/monitoring/ci_cd/docker-compose.yml +++ /dev/null @@ -1,60 +0,0 @@ -# docker-compose.yml - Compose file to run multiple services -version: '3.8' - -services: - api_gateway: - build: . - container_name: api_gateway - ports: - - "5000:5000" - environment: - - FLASK_ENV=production - - API_HOST=0.0.0.0 - - API_PORT=5000 - - CORS_ORIGINS=http://localhost:3000,http://localhost:5173 - - LOG_LEVEL=INFO - volumes: - - ./data:/app/data - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5000/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 5s - deploy: - resources: - limits: - cpus: '0.50' - memory: 512M - reservations: - cpus: '0.25' - memory: 256M - restart: unless-stopped - - realtime: - build: - context: . - dockerfile: Dockerfile - container_name: realtime_service - ports: - - "6000:6000" - environment: - - FLASK_ENV=production - - API_HOST=0.0.0.0 - - API_PORT=6000 - - LOG_LEVEL=INFO - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:6000"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 5s - deploy: - resources: - limits: - cpus: '0.50' - memory: 512M - reservations: - cpus: '0.25' - memory: 256M - restart: unless-stopped diff --git a/processing/caching/cache.py b/processing/caching/cache.py deleted file mode 100755 index 639f4b0..0000000 --- a/processing/caching/cache.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -""" -cache.py - Caching module. -This script sets up Redis caching to store frequently accessed data. -""" - -import redis - -# Configure Redis client -redis_client = redis.Redis(host='localhost', port=6379, db=0) - -def set_cache(key, value, expire=3600): - """ - Stores a key-value pair in Redis cache with expiration. - :param key: Cache key. - :param value: Value to cache. - :param expire: Expiration time in seconds (default: 1 hour). - """ - redis_client.setex(key, expire, value) - -def get_cache(key): - """ - Retrieves a value from Redis cache by key. - :param key: Cache key. - :return: Cached value or None. - """ - return redis_client.get(key) - -if __name__ == '__main__': - set_cache('sample_key', 'sample_value') - cached = get_cache('sample_key') - print("Cached Value:", cached.decode('utf-8') if cached else "None") diff --git a/processing/preprocessing/preprocess.py b/processing/preprocessing/preprocess.py deleted file mode 100755 index 47d1e81..0000000 --- a/processing/preprocessing/preprocess.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 -""" -preprocess.py - Data preprocessing module. -This script includes functions for cleaning raw data, -removing unwanted HTML, and normalizing text. -""" - -def clean_data(raw_data): - """ - Cleans raw text data. - :param raw_data: The raw text to be cleaned. - :return: Cleaned text. - """ - # TODO: Implement more sophisticated cleaning if needed. - return raw_data.strip() - -if __name__ == '__main__': - sample_data = " Sample News Content " - cleaned = clean_data(sample_data) - print("Cleaned Data:", cleaned) diff --git a/processing/storage/config.py b/processing/storage/config.py deleted file mode 100644 index 1bbe4a7..0000000 --- a/processing/storage/config.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -config.py - Storage configuration module. -This file contains configuration settings for databases and blob storage. -""" - -# Database configuration (example: PostgreSQL) -DATABASE_CONFIG = { - "host": "localhost", - "port": 5432, - "user": "news_user", - "password": "password", - "database": "news_db" -} - -# Blob storage configuration (example: AWS S3) -S3_CONFIG = { - "bucket_name": "news-aggregator-bucket", - "region": "us-east-1" -} diff --git a/run.sh b/run.sh deleted file mode 100644 index 23b21e9..0000000 --- a/run.sh +++ /dev/null @@ -1,6 +0,0 @@ - # Create and activate a virtual environment (recommended) -python -m venv venv -source venv/bin/activate # On Windows, use: venv\Scripts\activate - -# Install dependencies -pip install -r /Users/akalpitdawkhar/prog_news/news-aggregator/requirements.txt \ No newline at end of file diff --git a/summarization/.DS_Store b/summarization/.DS_Store deleted file mode 100644 index 1724e27ebca9b3b19e1782286d5e540d96875730..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}&BV5S|qd#J_Mc@z_`34JxTuW5U@W6az$Ri2_kieH!1v2hbPcAxu2^&CCkL zB8UfK%naH6+UZQU-?!NvmWW(^+^G>&iKv3cSl&gq#P~V4g6(+EKG4ZK9;itpx}_Gy zg=lk_0;a%UQ-Ig*A>CosrdYT6^?OZyx}ol%TWiE=GSm+*y{wB*(kN;V(l+8M_ve$d z=cm_`^*No5bB4YAck%v3N8mB01iu!z^yv{e0;b^6#Iq}SC%BL4>IEvW&yvL91{0YU$=G$XV#U#5k1x$f| zrGP6(ji?SvzP1*~ajgx|F45RHE>n~gbiN$R1()JuG%-92_yQPu%oO2)*&hLs!8%i5 Hs|tJogT@qS>_5|vk6r=K{x>Ypi*d>iqZ`dA}8P&+<_$<&cz##X&vIE z-GC69sqE+2Z=9E>#*T@|4AzrAQICi^D2(kcszC5KmrAr;4<{Qt$2BczGN1O-ab96- zhdQ7R{51#o-94lc4QWhwyD;n&%@hI%(_jjDUXc52(!_Z~%KYN9R~j zYjr>!PzN>+@cIy;FvboGLA!LIvPS@58{Ib0=1&UFi5$ib3qgz^jHCi3)wm;uk#zJ& zju$&D1SOq}JA4>-XX8#N#@-$CN0v?|7SviDPzTBm9LQyl_y3Ff^M5%>uhao`;9ogl z+DV!W@JMcNtvnp>wGqk{3J3EFL7RfgZO693Tk$E14U9$H0LBgrLG&Q>L%`agl{)aN G4txSBo1pgq diff --git a/tests/test_sample.py b/tests/test_sample.py deleted file mode 100755 index b48121a..0000000 --- a/tests/test_sample.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python3 -""" -test_sample.py - Sample test file for the News Aggregator Project. -This file contains a simple test to verify the testing setup. -""" - -def test_example(): - # Basic arithmetic test. - assert 1 + 1 == 2 - -if __name__ == '__main__': - import pytest - pytest.main() From c13742f4c5ac8f81317cca739900d78508753559 Mon Sep 17 00:00:00 2001 From: Rishabh Shah Date: Sun, 2 Mar 2025 23:38:44 -0500 Subject: [PATCH 05/12] reduced the number of articles fetched to 1 --- backend/api_gateway/api_gateway.py | 2 ++ backend/microservices/news_fetcher.py | 2 +- backend/microservices/story_tracking_service.py | 8 +++++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index f4625b9..6129193 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -54,6 +54,8 @@ from backend.core.utils import setup_logger, log_exception from backend.microservices.auth_service import load_users from backend.microservices.news_storage import store_article_in_supabase, log_user_search, add_bookmark, get_user_bookmarks, delete_bookmark +from backend.microservices.story_tracking_service import get_tracked_stories, create_tracked_story, get_story_details, delete_tracked_story + # Initialize logger for the API Gateway logger = setup_logger(__name__) diff --git a/backend/microservices/news_fetcher.py b/backend/microservices/news_fetcher.py index c52a04f..b5ab9d9 100644 --- a/backend/microservices/news_fetcher.py +++ b/backend/microservices/news_fetcher.py @@ -55,7 +55,7 @@ def fetch_news(keyword='', session_id=None): params = { 'q': keyword, # Search query parameter 'apiKey': NEWS_API_KEY, - 'pageSize': 10 # Limit results to 10 articles per request + 'pageSize': 1 # Limit results to 10 articles per request } try: diff --git a/backend/microservices/story_tracking_service.py b/backend/microservices/story_tracking_service.py index 8d229f8..89b4975 100755 --- a/backend/microservices/story_tracking_service.py +++ b/backend/microservices/story_tracking_service.py @@ -24,11 +24,13 @@ - SUPABASE_SERVICE_ROLE_KEY: Service role key for admin access """ +#TODO: Implement proper background processing: Use a task queue like Celery to handle article fetching in the background + import os import datetime from supabase import create_client, Client from dotenv import load_dotenv -from summarization.story_tracking.story_tracking import cluster_articles +# from summarization.story_tracking.story_tracking import cluster_articles from backend.microservices.news_fetcher import fetch_news # Service initialization logging @@ -125,8 +127,8 @@ def create_tracked_story(user_id, keyword, source_article_id=None): "added_at": datetime.datetime.utcnow().isoformat() }).execute() - # Immediately find and add related articles - print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Finding related articles for new tracked story") + # Log that we're skipping synchronous article fetching + print(f"[DEBUG] [story_tracking_service] [create_tracked_story] Skipping synchronous article fetching to avoid resource contention") find_related_articles(tracked_story["id"], keyword) return tracked_story From 5f4ac0f5b4445622de9a70c1e274a83b67cffb12 Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Wed, 5 Mar 2025 19:57:34 -0500 Subject: [PATCH 06/12] Added Dockerfile --- Dockerfile | 22 ++++++++++++++++++ backend/api_gateway/api_gateway.py | 6 +++-- .../summarization_service.cpython-312.pyc | Bin 8893 -> 8893 bytes requirements.txt | 11 +++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1171abd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +# Set environment variable to force Python output to be unbuffered +ENV PYTHONUNBUFFERED=1 + +# Set the working directory to /app +WORKDIR /app + +# Copy requirements.txt and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the entire project into the container +COPY . . + +# Expose port 5000 (Cloud Run sets the PORT env variable) +EXPOSE 5000 + +# Run the API Gateway. +# Cloud Run will set PORT, so we use that environment variable. +CMD ["sh", "-c", "python backend/api_gateway/api_gateway.py ${PORT:-5000}"] \ No newline at end of file diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index 6129193..d1a7ff5 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -829,6 +829,8 @@ def story_tracking_options(): return response if __name__ == '__main__': - port = int(sys.argv[1]) if len(sys.argv) > 1 else Config.API_PORT + # Read the port from the environment (Cloud Run sets the PORT variable) + port = int(os.environ.get("PORT", Config.API_PORT)) + # Listen on 0.0.0.0 so the service is reachable externally print(f"[DEBUG] [api_gateway] [main] Starting on {Config.API_HOST}:{port} with debug={True}") - app.run(host=Config.API_HOST, port=port, debug=True) + app.run(host="0.0.0.0", port=port, debug=True) diff --git a/backend/microservices/__pycache__/summarization_service.cpython-312.pyc b/backend/microservices/__pycache__/summarization_service.cpython-312.pyc index 5203afd1b25f7da9c48aae098400c724886c1f4d..8c95596403ccf7b3a43ab0888cb9919592b27394 100644 GIT binary patch delta 20 acmdn%y4RKaG%qg~0}xEoJGPN~lM( Date: Wed, 5 Mar 2025 23:10:43 -0500 Subject: [PATCH 07/12] Added cloudbuild.yaml --- cloudbuild.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 cloudbuild.yaml diff --git a/cloudbuild.yaml b/cloudbuild.yaml new file mode 100644 index 0000000..adfca83 --- /dev/null +++ b/cloudbuild.yaml @@ -0,0 +1,8 @@ +steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', 'gcr.io/$PROJECT_ID/news-aggregator:$COMMIT_SHA', '.'] +images: + - 'gcr.io/$PROJECT_ID/news-aggregator:$COMMIT_SHA' +options: + logging: CLOUD_LOGGING_ONLY # or NONE, if you prefer no logs +serviceAccount: "projects/$PROJECT_ID/serviceAccounts/NAME@PROJECT_ID.iam.gserviceaccount.com" \ No newline at end of file From 761b2dc80c3b66abd5d3e29450e05c791443aab3 Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Thu, 6 Mar 2025 00:11:19 -0500 Subject: [PATCH 08/12] Changed Port --- backend/api_gateway/api_gateway.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/api_gateway/api_gateway.py b/backend/api_gateway/api_gateway.py index d1a7ff5..f05ed18 100644 --- a/backend/api_gateway/api_gateway.py +++ b/backend/api_gateway/api_gateway.py @@ -830,7 +830,7 @@ def story_tracking_options(): if __name__ == '__main__': # Read the port from the environment (Cloud Run sets the PORT variable) - port = int(os.environ.get("PORT", Config.API_PORT)) + port = int(os.environ.get("PORT", 8080)) # Listen on 0.0.0.0 so the service is reachable externally print(f"[DEBUG] [api_gateway] [main] Starting on {Config.API_HOST}:{port} with debug={True}") app.run(host="0.0.0.0", port=port, debug=True) From 7d6fa021609b5eced896921933c699aab7fbffa9 Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Thu, 6 Mar 2025 00:18:10 -0500 Subject: [PATCH 09/12] Updated cloudbuild --- cloudbuild.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index adfca83..c04853d 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -5,4 +5,4 @@ images: - 'gcr.io/$PROJECT_ID/news-aggregator:$COMMIT_SHA' options: logging: CLOUD_LOGGING_ONLY # or NONE, if you prefer no logs -serviceAccount: "projects/$PROJECT_ID/serviceAccounts/NAME@PROJECT_ID.iam.gserviceaccount.com" \ No newline at end of file +serviceAccount: "projects/$PROJECT_ID/serviceAccounts/99775608725-compute@developer.gserviceaccount.com" \ No newline at end of file From aba7fea2e1551b5c986a626abfb848cf931bf371 Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Thu, 6 Mar 2025 10:40:24 -0500 Subject: [PATCH 10/12] modified: Dockerfile --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1171abd..007d4ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,8 +15,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . # Expose port 5000 (Cloud Run sets the PORT env variable) -EXPOSE 5000 +EXPOSE 8080 # Run the API Gateway. # Cloud Run will set PORT, so we use that environment variable. -CMD ["sh", "-c", "python backend/api_gateway/api_gateway.py ${PORT:-5000}"] \ No newline at end of file +CMD ["sh", "-c", "python backend/api_gateway/api_gateway.py ${PORT:-8080}"] \ No newline at end of file From 0d1b379e0d28689992acd8f991fe104ca796b61a Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Thu, 6 Mar 2025 10:59:01 -0500 Subject: [PATCH 11/12] Update requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 8dc78c5..f24af4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,8 @@ gunicorn==21.2.0 pydantic tenacity loguru +PyJWT==2.8.0 +flask-jwt-extended==4.5.3 # Caching & Storage redis From 107d7171fba98640e5a41b7ba6cea0e88c7f4310 Mon Sep 17 00:00:00 2001 From: akalpit23 Date: Thu, 6 Mar 2025 14:25:25 -0500 Subject: [PATCH 12/12] changes --- backend/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/core/config.py b/backend/core/config.py index 3116038..08b791c 100644 --- a/backend/core/config.py +++ b/backend/core/config.py @@ -11,7 +11,7 @@ class Config: # API Configuration API_HOST = os.getenv('API_HOST', 'localhost') - API_PORT = int(os.getenv('API_PORT', 5001)) + API_PORT = int(os.getenv('API_PORT', 8080)) # Redis Configuration REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')