diff --git a/api-service/api/openapi.yaml b/api-service/api/openapi.yaml index c390957..93ebe0c 100644 --- a/api-service/api/openapi.yaml +++ b/api-service/api/openapi.yaml @@ -897,46 +897,10 @@ components: phone: type: string pattern: ^\+?[\d\s-]+$ - gender: - type: string - nullable: true - description: User gender identity (e.g., 'male', 'female', 'non-binary', 'prefer_not_to_say') - example: "female" - incomeBracket: - type: string - nullable: true - description: User income bracket category (e.g., '<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say') - example: "50k-100k" - country: - type: string - nullable: true - description: User country of residence (ISO 3166-1 alpha-2 code) - example: "US" - age: - type: integer - format: int32 - nullable: true - description: User age - example: 35 privacySettings: - type: object - properties: - dataSharingConsent: - type: boolean - default: false - anonymizeData: - type: boolean - default: false - optInStores: - type: array - items: - type: string - description: List of store IDs user has opted into - optOutStores: - type: array - items: - type: string - description: List of store IDs user has opted out from + $ref: "#/components/schemas/PrivacySettings" + demographicData: + $ref: "#/components/schemas/DemographicData" createdAt: type: string format: date-time @@ -946,6 +910,19 @@ components: format: date-time readOnly: true + PrivacySettings: + type: object + properties: + dataSharingConsent: + type: boolean + description: User consent to share aggregated/anonymized data. + allowInference: + type: boolean + description: Allow Tapiro to infer demographic data based on user activity. + default: true + required: + - dataSharingConsent + Store: type: object required: @@ -1006,6 +983,9 @@ components: dataSharingConsent: type: boolean description: User's consent for data sharing + allowInference: + type: boolean + description: Allow Tapiro to infer demographic data (defaults to true if omitted). gender: type: string nullable: true @@ -1057,45 +1037,75 @@ components: phone: type: string description: User's phone number (E.164 format recommended) + pattern: '^\+?[\d\s-]+$' preferences: type: array description: User interest preferences with taxonomy categorization items: $ref: "#/components/schemas/PreferenceItem" privacySettings: + $ref: "#/components/schemas/PrivacySettings" + demographicData: type: object + description: Updatable user-provided demographic information. Setting a value here implies verification and may clear inferred values. properties: - dataSharingConsent: - type: boolean - anonymizeData: + gender: + type: string + nullable: true + description: "User-provided gender identity" + enum: [male, female, non-binary, prefer_not_to_say, null] + incomeBracket: + type: string + nullable: true + description: "User-provided income bracket category" + enum: + [ + "<25k", + "25k-50k", + "50k-100k", + "100k-200k", + ">200k", + "prefer_not_to_say", + null, + ] + country: + type: string + nullable: true + description: "User-provided country of residence (e.g., ISO 3166-1 alpha-2 code)" + age: + type: integer + format: int32 + nullable: true + description: "User-provided age. Setting this clears the inferred age bracket." + minimum: 0 + hasKids: type: boolean - optInStores: - type: array - items: - type: string - description: List of store IDs the user has opted into sharing data with - optOutStores: - type: array - items: - type: string - description: List of store IDs the user has opted out of sharing data with - gender: - type: string - nullable: true - description: User gender identity - incomeBracket: - type: string - nullable: true - description: User income bracket category - country: - type: string - nullable: true - description: User country of residence (ISO 3166-1 alpha-2 code) - age: - type: integer - format: int32 - nullable: true - description: User age + nullable: true + description: "User-provided: Does the user have children?" + relationshipStatus: + type: string + nullable: true + description: "User-provided: User relationship status" + enum: [single, relationship, married, prefer_not_to_say, null] + employmentStatus: + type: string + nullable: true + description: "User-provided: User employment status" + enum: [employed, unemployed, student, prefer_not_to_say, null] + educationLevel: + type: string + nullable: true + description: "User-provided: User education level" + enum: + [ + high_school, + bachelors, + masters, + doctorate, + prefer_not_to_say, + null, + ] + # REMOVED verification flags from update payload ApiKey: type: object @@ -1571,6 +1581,106 @@ components: version: type: string + DemographicData: + type: object + description: User-provided and inferred demographic information + properties: + # --- User-Provided --- + gender: + type: string + nullable: true + description: "User-provided gender identity" + enum: [male, female, non-binary, prefer_not_to_say, null] + example: "female" + incomeBracket: + type: string + nullable: true + description: "User-provided income bracket category" + enum: + [ + "<25k", + "25k-50k", + "50k-100k", + "100k-200k", + ">200k", + "prefer_not_to_say", + null, + ] + example: "50k-100k" + country: + type: string + nullable: true + description: "User-provided country of residence (e.g., ISO 3166-1 alpha-2 code)" + example: "US" + age: + type: integer + format: int32 + nullable: true + description: "User-provided age" + example: 35 + minimum: 0 + hasKids: # NEW User-provided + type: boolean + nullable: true + description: "User-provided: Does the user have children?" + example: true + relationshipStatus: # NEW User-provided + type: string + nullable: true + description: "User-provided: User relationship status" + enum: [single, relationship, married, prefer_not_to_say, null] + example: "married" + employmentStatus: # NEW User-provided + type: string + nullable: true + description: "User-provided: User employment status" + enum: [employed, unemployed, student, prefer_not_to_say, null] + example: "employed" + educationLevel: # NEW User-provided + type: string + nullable: true + description: "User-provided: User education level" + enum: + [ + high_school, + bachelors, + masters, + doctorate, + prefer_not_to_say, + null, + ] + example: "bachelors" + # --- Inferred (Read-Only in responses, not updatable directly) --- + inferredHasKids: + type: boolean + nullable: true + readOnly: true + description: "Inferred: Does the user likely have children? (null if unknown or user provided)" + inferredRelationshipStatus: + type: string + nullable: true + readOnly: true + description: "Inferred: User relationship status (null if unknown or user provided)" + enum: [single, relationship, married, null] + inferredEmploymentStatus: + type: string + nullable: true + readOnly: true + description: "Inferred: User employment status (null if unknown or user provided)" + enum: [employed, unemployed, student, null] + inferredEducationLevel: + type: string + nullable: true + readOnly: true + description: "Inferred: User education level (null if unknown or user provided)" + enum: [high_school, bachelors, masters, doctorate, null] + inferredGender: + type: string + nullable: true + readOnly: true + description: "Inferred: User gender identity (null if unknown or user provided)" + enum: [male, female, non-binary, null] + RecentUserDataEntry: type: object properties: diff --git a/api-service/service/AuthenticationService.js b/api-service/service/AuthenticationService.js index e3daed0..9e9ebf8 100644 --- a/api-service/service/AuthenticationService.js +++ b/api-service/service/AuthenticationService.js @@ -13,10 +13,11 @@ const { CACHE_TTL, CACHE_KEYS } = require('../utils/cacheConfig'); exports.registerUser = async function (req, body) { try { const db = getDB(); - // Destructure new demographic fields + // Destructure new demographic fields AND allowInference const { preferences, dataSharingConsent, + allowInference, // <-- Add allowInference gender, incomeBracket, country, @@ -100,14 +101,35 @@ exports.registerUser = async function (req, body) { username: userData.username || userData.nickname || userData.sub, email: userData.email, phone: userData.phone_number || null, - gender: gender || null, // Add new fields, defaulting to null if not provided - incomeBracket: incomeBracket || null, - country: country || null, - age: age || null, + demographicData: { + // User provided (initialize as null unless provided in registration body) + gender: gender || null, + incomeBracket: incomeBracket || null, + country: country || null, + age: age || null, + hasKids: null, // NEW user-provided, init null + relationshipStatus: null, // NEW user-provided, init null + employmentStatus: null, // NEW user-provided, init null + educationLevel: null, // NEW user-provided, init null + // Inferred (initialize as null) + inferredHasKids: null, + // REMOVED hasKidsIsVerified + inferredRelationshipStatus: null, + // REMOVED relationshipStatusIsVerified + inferredEmploymentStatus: null, + // REMOVED employmentStatusIsVerified + inferredEducationLevel: null, + // REMOVED educationLevelIsVerified + // REMOVED inferredAgeBracket + // REMOVED ageBracketIsVerified + inferredGender: null, + // REMOVED genderIsVerified + }, preferences: preferences || [], privacySettings: { dataSharingConsent, anonymizeData: false, + allowInference: allowInference !== undefined ? allowInference : true, // <-- Set allowInference, default true optInStores: [], optOutStores: [], }, diff --git a/api-service/service/UserProfileService.js b/api-service/service/UserProfileService.js index 53ba8fa..940d3cb 100644 --- a/api-service/service/UserProfileService.js +++ b/api-service/service/UserProfileService.js @@ -110,31 +110,58 @@ exports.updateUserProfile = async function (req, body) { // --- Update Demographic Data --- // Use dot notation to set fields within the demographicData object - if (body.gender !== undefined) { - updateData['demographicData.gender'] = body.gender; + + // User-provided fields + if (body.demographicData?.gender !== undefined) { + updateData['demographicData.gender'] = body.demographicData.gender; + updateData['demographicData.inferredGender'] = null; // Clear inferred on user update demographicsChanged = true; } - if (body.incomeBracket !== undefined) { - updateData['demographicData.incomeBracket'] = body.incomeBracket; + if (body.demographicData?.incomeBracket !== undefined) { + updateData['demographicData.incomeBracket'] = body.demographicData.incomeBracket; demographicsChanged = true; } - if (body.country !== undefined) { - updateData['demographicData.country'] = body.country; + if (body.demographicData?.country !== undefined) { + updateData['demographicData.country'] = body.demographicData.country; demographicsChanged = true; } - if (body.age !== undefined) { - // Ensure age is null or an integer - const ageValue = body.age === null ? null : parseInt(body.age); - if (ageValue === null || !isNaN(ageValue)) { + if (body.demographicData?.age !== undefined) { + const ageValue = body.demographicData.age === null ? null : parseInt(body.demographicData.age); + if (ageValue === null || (!isNaN(ageValue) && ageValue >= 0)) { // Added age >= 0 check updateData['demographicData.age'] = ageValue; + // No inferred age bracket to clear anymore demographicsChanged = true; - // If age is being set, clear the inferred age bracket - updateData['demographicData.inferredAgeBracket'] = null; } else { - console.warn(`Invalid age value provided for user ${auth0UserId}: ${body.age}`); + console.warn(`Invalid age value provided for user ${auth0UserId}: ${body.demographicData.age}`); // Optionally return a 400 error here + // return respondWithCode(400, { code: 400, message: 'Invalid age provided.' }); } } + // --- NEW User-Provided Fields --- + if (body.demographicData?.hasKids !== undefined) { + updateData['demographicData.hasKids'] = body.demographicData.hasKids; + updateData['demographicData.inferredHasKids'] = null; // Clear inferred on user update + demographicsChanged = true; + } + if (body.demographicData?.relationshipStatus !== undefined) { + updateData['demographicData.relationshipStatus'] = body.demographicData.relationshipStatus; + updateData['demographicData.inferredRelationshipStatus'] = null; // Clear inferred on user update + demographicsChanged = true; + } + if (body.demographicData?.employmentStatus !== undefined) { + updateData['demographicData.employmentStatus'] = body.demographicData.employmentStatus; + updateData['demographicData.inferredEmploymentStatus'] = null; // Clear inferred on user update + demographicsChanged = true; + } + if (body.demographicData?.educationLevel !== undefined) { + updateData['demographicData.educationLevel'] = body.demographicData.educationLevel; + updateData['demographicData.inferredEducationLevel'] = null; // Clear inferred on user update + demographicsChanged = true; + } + + // --- REMOVED Verification Flag Handling --- + // The logic for hasKidsIsVerified, relationshipStatusIsVerified, etc. is removed. + // --- End Update Demographic Data --- @@ -146,8 +173,8 @@ exports.updateUserProfile = async function (req, body) { updateData['privacySettings.dataSharingConsent'] = body.privacySettings.dataSharingConsent; privacySettingsChanged = true; } - if (body.privacySettings.anonymizeData !== undefined) { - updateData['privacySettings.anonymizeData'] = body.privacySettings.anonymizeData; + if (body.privacySettings.allowInference !== undefined) { // <-- Add check for allowInference + updateData['privacySettings.allowInference'] = body.privacySettings.allowInference; privacySettingsChanged = true; } // DO NOT update optInStores or optOutStores here @@ -190,7 +217,7 @@ exports.updateUserProfile = async function (req, body) { } // Invalidate store-specific preferences if demographics or relevant privacy settings changed - // Also invalidate if the optInStores list exists (safer to clear on any profile update) + // (Keep existing logic, as privacySettingsChanged flag now includes allowInference) const updatedUserDoc = result; // Use the returned document from findOneAndUpdate if ((demographicsChanged || privacySettingsChanged) && updatedUserDoc.privacySettings?.optInStores) { const userObjectId = updatedUserDoc._id; // Use the _id from the updated result diff --git a/api-service/utils/dbSchemas.js b/api-service/utils/dbSchemas.js index 108d7a2..b000021 100644 --- a/api-service/utils/dbSchemas.js +++ b/api-service/utils/dbSchemas.js @@ -3,7 +3,7 @@ */ // Schema version tracking -const SCHEMA_VERSION = '2.0.8'; // Incremented version +const SCHEMA_VERSION = '3.0.0'; // Incremented version const userSchema = { validator: { @@ -37,50 +37,78 @@ const userSchema = { bsonType: 'object', description: 'User-provided and inferred demographic information', properties: { + // --- User-Provided --- gender: { bsonType: ['string', 'null'], - description: 'User gender identity', + description: 'User-provided gender identity', // Clarified description enum: ['male', 'female', 'non-binary', 'prefer_not_to_say', null] }, incomeBracket: { bsonType: ['string', 'null'], - description: 'User income bracket category', + description: 'User-provided income bracket category', // Clarified description enum: ['<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say', null] }, country: { bsonType: ['string', 'null'], - description: 'User country of residence (e.g., ISO 3166-1 alpha-2 code)', + description: 'User-provided country of residence (e.g., ISO 3166-1 alpha-2 code)', // Clarified description }, age: { bsonType: ['int', 'null'], - description: 'User age', + description: 'User-provided age', // Clarified description minimum: 0, }, - // --- Inferred fields within demographicData --- + // --- NEW User-Provided fields (mirroring inferred ones) --- + hasKids: { + bsonType: ['bool', 'null'], + description: 'User-provided: Does the user have children?', + }, + relationshipStatus: { + bsonType: ['string', 'null'], + description: 'User-provided: User relationship status', + enum: ['single', 'relationship', 'married', 'prefer_not_to_say', null], // Added prefer_not_to_say + }, + employmentStatus: { + bsonType: ['string', 'null'], + description: 'User-provided: User employment status', + enum: ['employed', 'unemployed', 'student', 'prefer_not_to_say', null], // Added prefer_not_to_say + }, + educationLevel: { + bsonType: ['string', 'null'], + description: 'User-provided: User education level', + enum: ['high_school', 'bachelors', 'masters', 'doctorate', 'prefer_not_to_say', null], // Added prefer_not_to_say + }, + // --- Inferred fields (kept separate, no verification flags) --- inferredHasKids: { bsonType: ['bool', 'null'], - description: 'Inferred: Does the user likely have children? (null if unknown)', + description: 'Inferred: Does the user likely have children? (null if unknown or user provided)', }, + // REMOVED hasKidsIsVerified inferredRelationshipStatus: { bsonType: ['string', 'null'], - description: 'Inferred: User relationship status (null if unknown)', - enum: ['single', 'relationship', 'married', null], + description: 'Inferred: User relationship status (null if unknown or user provided)', + enum: ['single', 'relationship', 'married', null], // Inferred won't be 'prefer_not_to_say' }, + // REMOVED relationshipStatusIsVerified inferredEmploymentStatus: { bsonType: ['string', 'null'], - description: 'Inferred: User employment status (null if unknown)', + description: 'Inferred: User employment status (null if unknown or user provided)', enum: ['employed', 'unemployed', 'student', null], }, + // REMOVED employmentStatusIsVerified inferredEducationLevel: { bsonType: ['string', 'null'], - description: 'Inferred: User education level (null if unknown)', + description: 'Inferred: User education level (null if unknown or user provided)', enum: ['high_school', 'bachelors', 'masters', 'doctorate', null], }, - inferredAgeBracket: { + // REMOVED educationLevelIsVerified + // REMOVED inferredAgeBracket + // REMOVED ageBracketIsVerified + inferredGender: { // Kept inferred gender bsonType: ['string', 'null'], - description: 'Inferred: User age bracket if age not provided (null if unknown)', - enum: ['18-24', '25-34', '35-44', '45-54', '55-64', '65+', null], + description: 'Inferred: User gender identity (null if unknown or user provided)', + enum: ['male', 'female', 'non-binary', null], }, + // REMOVED genderIsVerified } }, // --- End: Demographic Data Object --- @@ -118,9 +146,12 @@ const userSchema = { required: ['dataSharingConsent'], properties: { dataSharingConsent: { bsonType: 'bool' }, - anonymizeData: { bsonType: 'bool' }, - optInStores: { bsonType: 'array', items: { bsonType: 'string' } }, // Specify item type - optOutStores: { bsonType: 'array', items: { bsonType: 'string' } }, // Specify item type + allowInference: { // <-- Add new field + bsonType: 'bool', + description: 'Allow Tapiro to infer demographic data based on user activity (default: true)', + }, + optInStores: { bsonType: 'array', items: { bsonType: 'string' } }, + optOutStores: { bsonType: 'array', items: { bsonType: 'string' } }, }, }, createdAt: { bsonType: 'date' }, diff --git a/ml-service/app/services/demographicInference.py b/ml-service/app/services/demographicInference.py index cf80305..8bed9eb 100644 --- a/ml-service/app/services/demographicInference.py +++ b/ml-service/app/services/demographicInference.py @@ -2,229 +2,330 @@ from collections import defaultdict from typing import List, Dict, Any, Optional, Tuple from bson import ObjectId -from datetime import datetime # Import datetime -from app.utils.redis_util import invalidate_cache, CACHE_KEYS # Import cache utilities +from datetime import datetime +from app.utils.redis_util import invalidate_cache, CACHE_KEYS +from sentence_transformers import util +import numpy as np +from app.services.taxonomyService import get_taxonomy_service logger = logging.getLogger(__name__) -# --- Keyword Definitions (Examples - Expand significantly) --- -KIDS_KEYWORDS = { - "baby", "toddler", "child", "kid", "infant", "diaper", "stroller", - "crib", "formula", "nursery", "maternity", "school supplies", "toy", - "lego", "barbie", "playstation", "nintendo", # Be careful with broad terms -} -RELATIONSHIP_KEYWORDS = { - "wedding", "engagement", "anniversary", "couple", "partner", "spouse", - "boyfriend", "girlfriend", "husband", "wife", "romantic", "valentine", -} -MARRIED_KEYWORDS = { - "wedding", "anniversary", "spouse", "husband", "wife", "married", -} -SINGLE_KEYWORDS = { - "single", "dating app", "matchmaking", +# --- Semantic Configuration --- +SEMANTIC_SIMILARITY_THRESHOLD = 0.60 +EVIDENCE_THRESHOLD_DEFAULT = 2.0 # Use float for weighted evidence +EVIDENCE_THRESHOLD_MARRIED = 1.5 # Lower threshold for strong marriage signals +EVIDENCE_THRESHOLD_EDUCATION = 1.5 # Lower threshold for specific education terms + +# --- NEW: Rule-Based Keyword Configuration --- +# Keywords should be lowercase +KEYWORD_RULES = { + "has_kids": { + True: {"baby", "infant", "maternity", "diaper", "stroller", "crib", "newborn", "toddler", "child's toy"} + }, + "relationship_status": { + "married": {"wedding", "anniversary", "spouse", "husband", "wife"}, + "relationship": {"boyfriend", "girlfriend", "dating", "partner gift", "couples"} + }, + "employment_status": { + "student": {"student loan", "internship", "university", "college", "textbook", "dorm"}, + "unemployed": {"resume help", "job search"} + }, + "education_level": { + "doctorate": {"phd", "dissertation", "postdoc"}, + "masters": {"master's degree", "thesis"}, + "bachelors": {"bachelor's degree", "undergrad"}, + }, + "gender": { # Use with extreme caution + "male": {"men's", "for him", "grooming kit men"}, + "female": {"women's", "for her", "makeup set", "feminine hygiene"} + } } -# --- NEW Keyword Sets --- -EMPLOYMENT_KEYWORDS = { - "job search", "linkedin", "resume", "interview suit", "office supplies", - "business travel", "conference", "work laptop", "unemployment benefits", - "career fair", "networking event", -} -STUDENT_KEYWORDS = { - "student discount", "university", "college", "textbook", "dorm room", - "student loan", "internship", "campus", "study guide", "backpack", - "school supplies", # Overlap with KIDS_KEYWORDS, context matters -} -EDUCATION_KEYWORDS = { - "university", "college", "bachelor's degree", "master's degree", "phd", - "doctorate", "thesis", "dissertation", "academic journal", "textbook", - "research paper", "graduate school", -} -# Age bracket keywords are very unreliable, use with extreme caution or alternative methods -AGE_BRACKET_YOUNG_ADULT_KEYWORDS = { # Approx 18-24 - "college", "university", "first apartment", "internship", "study abroad", - "spring break", "starter job", -} -AGE_BRACKET_MID_CAREER_KEYWORDS = { # Approx 35-54 - "mortgage", "kids' college fund", "management training", "midlife crisis", # Joking, but maybe? - "retirement planning", "executive", -} -AGE_BRACKET_SENIOR_KEYWORDS = { # Approx 65+ - "retirement", "pension", "senior discount", "medicare", "grandchild", - "assisted living", "downsizing home", +# --- NEW: Evidence Weights --- +RULE_MATCH_WEIGHT = 1.5 +SEMANTIC_MATCH_WEIGHT = 1.0 + +# --- Semantic Target Descriptions --- +SEMANTIC_TARGETS = { + "has_kids": { + True: [ + "items for babies or infants", + "children's toys and games", + "parenting supplies", + "school-related items for kids", + "maternity wear or products", + "family activities or vacations", + ], + }, + "relationship_status": { + "married": [ + "wedding gifts or planning items", + "anniversary presents", + "items for spouse or partner", + "joint home purchases", + "husband or wife related items", + ], + "relationship": [ + "gifts for partner or significant other", + "couples items or activities", + "romantic presents", + "dating related items", + "items for boyfriend or girlfriend", + ], + "single": [ + "items for one person", + "dating app subscriptions", + "solo travel or activities", + "self-care items focused on independence", + ] + }, + "employment_status": { + "employed": [ + "professional work attire", + "office supplies or equipment", + "business travel items", + "commute-related products", + "career development materials", + ], + "student": [ + "textbooks or course materials", + "university or college supplies", + "dorm room furnishings", + "student discounts or events", + "internship-related items", + "study aids", + ], + "unemployed": [ + "job searching resources", + "resume building services", + ] + }, + "education_level": { + "doctorate": [ + "phd program materials", + "dissertation research tools", + "academic conference registration", + "postdoctoral research supplies", + ], + "masters": [ + "master's degree program materials", + "graduate school textbooks", + "thesis writing resources", + ], + "bachelors": [ + "bachelor's degree program materials", + "undergraduate textbooks", + "college supplies", + "university merchandise", + ], + "high_school": [ + "high school supplies", + ] + }, + "gender": { # Also potentially unreliable/sensitive + "male": [ + "men's clothing and accessories", + "grooming products typically for men", + "hobbies stereotypically associated with men", + "gifts for him", + ], + "female": [ + "women's clothing and accessories", + "makeup and cosmetics", + "skincare products typically for women", + "hobbies stereotypically associated with women", + "gifts for her", + "feminine hygiene products", + ], + "non-binary": [ + "gender-neutral clothing", + "unisex products", + ] + } } -# --- End NEW Keyword Sets --- +# --- End Semantic Target Descriptions --- -# --- Helper Function to Extract Text --- -def _extract_text_from_entries(entries: List[Dict[str, Any]]) -> List[str]: - """Extracts relevant text (item names, search queries) from entries.""" +# --- Helper Function to Extract Text (Keep as is) --- +def _extract_text_from_user_data_docs(user_data_docs: List[Dict[str, Any]]) -> List[str]: + """Extracts relevant text (item names, search queries) from a list of userData documents.""" texts = [] - for entry in entries: - if entry.get("dataType") == "purchase": - texts.extend([item.get("name", "").lower() for item in entry.get("items", [])]) - elif entry.get("dataType") == "search": - texts.append(entry.get("query", "").lower()) + for doc in user_data_docs: + # Each doc has an 'entries' list + for entry in doc.get("entries", []): + # Process purchase items within each entry + if doc.get("dataType") == "purchase": # Check dataType at the document level + for item in entry.get("items", []): + if item.get("name"): + texts.append(item["name"].lower()) + # Process search query within each entry + elif doc.get("dataType") == "search": # Check dataType at the document level + if entry.get("query"): + texts.append(entry["query"].lower()) return [text for text in texts if text] # Filter out empty strings -# --- Inference Functions --- -async def infer_has_kids(entries: List[Dict[str, Any]]) -> Optional[bool]: - """Infer if user has kids based on purchase/search keywords.""" - kid_evidence_count = 0 - texts = _extract_text_from_entries(entries) - logger.debug(f"Inferring 'has_kids' from {len(texts)} text entries.") - for text in texts: - if any(keyword in text for keyword in KIDS_KEYWORDS): - kid_evidence_count += 1 - logger.debug(f"Kid keyword found: {text}") - - if kid_evidence_count >= 2: # Require multiple pieces of evidence - logger.debug(f"Inferring 'has_kids' = True (evidence count: {kid_evidence_count})") - return True - logger.debug(f"Inferring 'has_kids' = None (evidence count: {kid_evidence_count})") - return None # Not enough evidence - -async def infer_relationship_status(entries: List[Dict[str, Any]]) -> Optional[str]: - """Infer relationship status (single, relationship, married) based on keywords.""" - married_evidence = 0 - relationship_evidence = 0 - single_evidence = 0 # Less reliable - texts = _extract_text_from_entries(entries) - logger.debug(f"Inferring 'relationship_status' from {len(texts)} text entries.") +# --- Hybrid Inference Helper (Modified) --- +async def _run_hybrid_inference_for_attribute( # Renamed for clarity + attribute_name: str, + user_data_docs: List[Dict[str, Any]], + taxonomy_service # Pass the service instance +) -> Optional[Any]: + """ + Generic function to infer a demographic attribute using HYBRID (rule + semantic) approach. - for text in texts: - # Check married first for priority - if any(keyword in text for keyword in MARRIED_KEYWORDS): - married_evidence += 1 - logger.debug(f"Married keyword found: {text}") - elif any(keyword in text for keyword in RELATIONSHIP_KEYWORDS): - relationship_evidence += 1 - logger.debug(f"Relationship keyword found: {text}") - elif any(keyword in text for keyword in SINGLE_KEYWORDS): - single_evidence += 1 - logger.debug(f"Single keyword found: {text}") - - # Prioritize married > relationship > single based on evidence threshold - if married_evidence >= 1: # Lower threshold for specific events like wedding - logger.debug(f"Inferring 'relationship_status' = 'married' (evidence count: {married_evidence})") - return "married" - elif relationship_evidence >= 2: - logger.debug(f"Inferring 'relationship_status' = 'relationship' (evidence count: {relationship_evidence})") - return "relationship" - # elif single_evidence >= 1: # Be very cautious enabling this - # logger.debug(f"Inferring 'relationship_status' = 'single' (evidence count: {single_evidence})") - # return "single" - logger.debug("Inferring 'relationship_status' = None (insufficient evidence)") - return None # Not enough evidence - -# --- NEW Inference Functions --- - -async def infer_employment_status(entries: List[Dict[str, Any]]) -> Optional[str]: - """Infer employment status (employed, student, unemployed) based on keywords.""" - student_evidence = 0 - employment_evidence = 0 - # Inferring 'unemployed' directly from keywords is very difficult/unreliable - texts = _extract_text_from_entries(entries) - logger.debug(f"Inferring 'employment_status' from {len(texts)} text entries.") + Args: + attribute_name: The key from SEMANTIC_TARGETS/KEYWORD_RULES. + user_data_docs: List of user data documents. + taxonomy_service: Initialized TaxonomyService instance. - for text in texts: - # Check student first due to potential overlap (e.g., "school supplies") - if any(keyword in text for keyword in STUDENT_KEYWORDS): - student_evidence += 1 - logger.debug(f"Student keyword found: {text}") - elif any(keyword in text for keyword in EMPLOYMENT_KEYWORDS): - employment_evidence += 1 - logger.debug(f"Employment keyword found: {text}") - - # Prioritize student if strong evidence, otherwise employed - if student_evidence >= 2: - logger.debug(f"Inferring 'employment_status' = 'student' (evidence count: {student_evidence})") - return "student" - elif employment_evidence >= 2: - logger.debug(f"Inferring 'employment_status' = 'employed' (evidence count: {employment_evidence})") - return "employed" - # Add more sophisticated logic? Check for conflicting terms? - logger.debug("Inferring 'employment_status' = None (insufficient evidence)") - return None # Not enough evidence - -async def infer_education_level(entries: List[Dict[str, Any]]) -> Optional[str]: - """Infer education level (high_school, bachelors, masters, doctorate) - Very Speculative.""" - doctorate_evidence = 0 - masters_evidence = 0 - bachelors_evidence = 0 - texts = _extract_text_from_entries(entries) - logger.debug(f"Inferring 'education_level' from {len(texts)} text entries.") + Returns: + The inferred value or None if insufficient evidence. + """ + texts = _extract_text_from_user_data_docs(user_data_docs) + if not texts: + logger.debug(f"Hybrid Inference ({attribute_name}): No text entries found.") + return None + + logger.debug(f"Hybrid Inference ({attribute_name}): Processing {len(texts)} text entries.") + + # --- Prepare Semantic Targets (if needed) --- + semantic_target_map = SEMANTIC_TARGETS.get(attribute_name, {}) + target_embeddings_tensor = None + semantic_target_value_map = {} + all_semantic_target_texts = [] + can_do_semantic = False + + if semantic_target_map and taxonomy_service and taxonomy_service.embedding_model: + idx = 0 + for value, descriptions in semantic_target_map.items(): + for desc in descriptions: + all_semantic_target_texts.append(desc) + semantic_target_value_map[idx] = value + idx += 1 + + if all_semantic_target_texts: + try: + target_embeddings_tensor = taxonomy_service.embedding_model.encode(all_semantic_target_texts, convert_to_tensor=True) + can_do_semantic = True + logger.debug(f"Hybrid Inference ({attribute_name}): Prepared semantic targets.") + except Exception as e: + logger.error(f"Hybrid Inference ({attribute_name}): Failed to encode semantic target descriptions: {e}", exc_info=True) + else: + logger.debug(f"Hybrid Inference ({attribute_name}): No semantic target descriptions found.") + else: + logger.warning(f"Hybrid Inference ({attribute_name}): Semantic inference disabled (no targets or model unavailable).") + # --- End Prepare Semantic Targets --- - for text in texts: - # Check most specific first - if any(keyword in text for keyword in ["phd", "doctorate", "dissertation"]): - doctorate_evidence += 1 - logger.debug(f"Doctorate keyword found: {text}") - elif any(keyword in text for keyword in ["master's degree", "graduate school", "thesis"]): - masters_evidence += 1 - logger.debug(f"Masters keyword found: {text}") - elif any(keyword in text for keyword in ["bachelor's degree", "university", "college", "undergrad"]): - bachelors_evidence += 1 - logger.debug(f"Bachelors keyword found: {text}") - - # Prioritize highest level found with some evidence threshold - if doctorate_evidence >= 1: - logger.debug(f"Inferring 'education_level' = 'doctorate' (evidence count: {doctorate_evidence})") - return "doctorate" - elif masters_evidence >= 1: - logger.debug(f"Inferring 'education_level' = 'masters' (evidence count: {masters_evidence})") - return "masters" - elif bachelors_evidence >= 2: # Require slightly more for bachelors - logger.debug(f"Inferring 'education_level' = 'bachelors' (evidence count: {bachelors_evidence})") - return "bachelors" - # Inferring 'high_school' is difficult, maybe default if other evidence is weak? - logger.debug("Inferring 'education_level' = None (insufficient evidence)") - return None # Very uncertain - -async def infer_age_bracket(entries: List[Dict[str, Any]]) -> Optional[str]: - """Infer age bracket based on keywords - EXTREMELY SPECULATIVE AND UNRELIABLE.""" - young_adult_evidence = 0 - mid_career_evidence = 0 - senior_evidence = 0 - texts = _extract_text_from_entries(entries) - logger.debug(f"Inferring 'age_bracket' from {len(texts)} text entries.") + # --- Prepare Keyword Rules --- + keyword_rule_map = KEYWORD_RULES.get(attribute_name, {}) + can_do_rules = bool(keyword_rule_map) + logger.debug(f"Hybrid Inference ({attribute_name}): Keyword rules {'enabled' if can_do_rules else 'disabled'}.") + # --- End Prepare Keyword Rules --- - for text in texts: - if any(keyword in text for keyword in AGE_BRACKET_SENIOR_KEYWORDS): - senior_evidence += 1 - logger.debug(f"Senior age keyword found: {text}") - elif any(keyword in text for keyword in AGE_BRACKET_MID_CAREER_KEYWORDS): - mid_career_evidence += 1 - logger.debug(f"Mid-career age keyword found: {text}") - elif any(keyword in text for keyword in AGE_BRACKET_YOUNG_ADULT_KEYWORDS): - young_adult_evidence += 1 - logger.debug(f"Young adult age keyword found: {text}") - - # Simple thresholding - needs much refinement or a different approach - if senior_evidence >= 1: - logger.debug(f"Inferring 'age_bracket' = '65+' (evidence count: {senior_evidence})") - return "65+" - elif mid_career_evidence >= 2: - # Could try to differentiate 35-44 vs 45-54 based on keywords, but very hard - logger.debug(f"Inferring 'age_bracket' = '35-54' (evidence count: {mid_career_evidence})") - return "35-54" # Combine for now - elif young_adult_evidence >= 2: - logger.debug(f"Inferring 'age_bracket' = '18-24' (evidence count: {young_adult_evidence})") - return "18-24" - - logger.warning("Age bracket inference based on keywords is highly unreliable.") - logger.debug("Inferring 'age_bracket' = None (insufficient evidence)") - return None # Highly uncertain - -# --- Main Inference Runner --- + if not can_do_rules and not can_do_semantic: + logger.warning(f"Hybrid Inference ({attribute_name}): No rules or semantic targets available. Cannot infer.") + return None + # --- Calculate similarities and count evidence (Hybrid Logic) --- + evidence_counts = defaultdict(float) # Use float for weighted evidence + matched_texts_per_value = defaultdict(set) # Track unique texts per value + + for text in texts: + matched_by_rule = False + # 1. Check Keyword Rules First + if can_do_rules: + for value, keywords in keyword_rule_map.items(): + # Simple substring check for keywords + if any(keyword in text for keyword in keywords): + if text not in matched_texts_per_value[value]: + evidence_counts[value] += RULE_MATCH_WEIGHT + matched_texts_per_value[value].add(text) + logger.debug(f"Rule Match ({attribute_name}): '{text}' -> '{value}' (Weight: {RULE_MATCH_WEIGHT})") + matched_by_rule = True + break # Stop checking rules for this text once one matches + if matched_by_rule: + continue # Move to the next text if a rule matched + + # 2. If no rule matched, try Semantic Check + if can_do_semantic: + try: + text_embedding = taxonomy_service.embedding_model.encode(text, convert_to_tensor=True) + similarities = util.pytorch_cos_sim(text_embedding, target_embeddings_tensor)[0] + best_match_idx = similarities.argmax().item() + best_score = similarities[best_match_idx].item() + + if best_score >= SEMANTIC_SIMILARITY_THRESHOLD: + matched_value = semantic_target_value_map[best_match_idx] + if text not in matched_texts_per_value[matched_value]: + evidence_counts[matched_value] += SEMANTIC_MATCH_WEIGHT + matched_texts_per_value[matched_value].add(text) + logger.debug(f"Semantic Match ({attribute_name}): '{text}' -> '{matched_value}' (Score: {best_score:.4f}, Weight: {SEMANTIC_MATCH_WEIGHT}, Target: '{all_semantic_target_texts[best_match_idx]}')") + + except Exception as e: + logger.warning(f"Hybrid Inference ({attribute_name}): Error processing semantic check for text '{text}': {e}") + continue # Skip this text entry + # --- End Calculate similarities --- + + + # --- Determine inferred value based on evidence thresholds (Using Floats) --- + inferred_value = None + highest_evidence_score = 0.0 + + # Special handling for relationship status priority + if attribute_name == "relationship_status": + if evidence_counts.get("married", 0.0) >= EVIDENCE_THRESHOLD_MARRIED: + inferred_value = "married" + highest_evidence_score = evidence_counts["married"] + elif evidence_counts.get("relationship", 0.0) >= EVIDENCE_THRESHOLD_DEFAULT: + # Only infer 'relationship' if 'married' didn't meet its threshold + if inferred_value != "married": + inferred_value = "relationship" + highest_evidence_score = evidence_counts["relationship"] + # Add 'single' check if desired + # elif evidence_counts.get("single", 0.0) >= EVIDENCE_THRESHOLD_DEFAULT + 1.0: ... + + # Special handling for education level priority + elif attribute_name == "education_level": + if evidence_counts.get("doctorate", 0.0) >= EVIDENCE_THRESHOLD_EDUCATION: + inferred_value = "doctorate" + highest_evidence_score = evidence_counts["doctorate"] + elif evidence_counts.get("masters", 0.0) >= EVIDENCE_THRESHOLD_EDUCATION: + if inferred_value != "doctorate": + inferred_value = "masters" + highest_evidence_score = evidence_counts["masters"] + elif evidence_counts.get("bachelors", 0.0) >= EVIDENCE_THRESHOLD_DEFAULT: + if inferred_value not in ["doctorate", "masters"]: + inferred_value = "bachelors" + highest_evidence_score = evidence_counts["bachelors"] + + else: + # Default handling: pick value with highest evidence score above threshold + threshold = EVIDENCE_THRESHOLD_DEFAULT + for value, score in evidence_counts.items(): + if score >= threshold and score > highest_evidence_score: + highest_evidence_score = score + inferred_value = value + + if inferred_value is not None: + logger.info(f"Hybrid Inference Result ({attribute_name}): Inferred '{inferred_value}' (Evidence Score: {highest_evidence_score:.2f})") + else: + # Log counts even if insufficient + log_counts = {k: round(v, 2) for k, v in evidence_counts.items()} + logger.info(f"Hybrid Inference Result ({attribute_name}): None (Insufficient evidence. Scores: {log_counts})") + + return inferred_value + # --- End Determine inferred value --- + + +# --- Main Inference Runner (Updated) --- async def run_inference_for_user(user_id: str, email: str, db, limit: int = 50) -> bool: """ - Runs demographic inference based on recent user data and updates the user document if changes are found. + Runs HYBRID demographic inference based on recent user data and updates + the user document if changes are found AND the user has not provided their own value. Returns True if the user document was updated, False otherwise. """ - logger.info(f"Running demographic inference for user {user_id} ({email})") + logger.info(f"Running HYBRID demographic inference for user {user_id} ({email})") updated = False try: user_object_id = ObjectId(user_id) @@ -233,7 +334,7 @@ async def run_inference_for_user(user_id: str, email: str, db, limit: int = 50) logger.error(f"Inference: User not found by ID {user_id}") return False - # Fetch recent userData entries for the user + # Fetch recent userData entries recent_data = await db.userData.find( {"userId": user_object_id} ).sort("timestamp", -1).limit(limit).to_list(length=limit) @@ -241,86 +342,100 @@ async def run_inference_for_user(user_id: str, email: str, db, limit: int = 50) if not recent_data: logger.info(f"Inference: No recent data found for user {user_id}") return False - else: - logger.info(f"Inference: Found {len(recent_data)} recent data entries for user {user_id}") + logger.info(f"Inference: Found {len(recent_data)} recent data entries for user {user_id}") + + # Get Taxonomy Service (needed for embeddings) + taxonomy_service = await get_taxonomy_service(db) - # --- Run inference functions --- - inferred_kids = await infer_has_kids(recent_data) - inferred_status = await infer_relationship_status(recent_data) - inferred_employment = await infer_employment_status(recent_data) - inferred_education = await infer_education_level(recent_data) # Very speculative - inferred_age_bracket = None - # Only infer age bracket if age is not already set in demographicData current_demographics = user.get("demographicData", {}) - if current_demographics.get("age") is None: - logger.info(f"Inference: User {email} has no age set, attempting age bracket inference.") - inferred_age_bracket = await infer_age_bracket(recent_data) # Highly speculative + + # --- Run hybrid inference functions (conditionally) --- + inferred_kids = None + if current_demographics.get("hasKids") is None: + inferred_kids = await _run_hybrid_inference_for_attribute("has_kids", recent_data, taxonomy_service) + else: + logger.info(f"Inference (has_kids): Skipped, user value exists ('{current_demographics.get('hasKids')}')") + + inferred_status = None + if current_demographics.get("relationshipStatus") is None: + inferred_status = await _run_hybrid_inference_for_attribute("relationship_status", recent_data, taxonomy_service) else: - logger.info(f"Inference: User {email} has age set ({current_demographics.get('age')}), skipping age bracket inference.") + logger.info(f"Inference (relationship_status): Skipped, user value exists ('{current_demographics.get('relationshipStatus')}')") + inferred_employment = None + if current_demographics.get("employmentStatus") is None: + inferred_employment = await _run_hybrid_inference_for_attribute("employment_status", recent_data, taxonomy_service) + else: + logger.info(f"Inference (employment_status): Skipped, user value exists ('{current_demographics.get('employmentStatus')}')") - # --- Prepare update payload --- + inferred_education = None + if current_demographics.get("educationLevel") is None: + inferred_education = await _run_hybrid_inference_for_attribute("education_level", recent_data, taxonomy_service) + else: + logger.info(f"Inference (education_level): Skipped, user value exists ('{current_demographics.get('educationLevel')}')") + + inferred_gender = None + if current_demographics.get("gender") is None: + inferred_gender = await _run_hybrid_inference_for_attribute("gender", recent_data, taxonomy_service) + else: + logger.info(f"Inference (gender): Skipped, user value exists ('{current_demographics.get('gender')}')") + + # --- Prepare update payload (Simplified check_and_set) --- update_payload = {} - # Read current values from the nested demographicData object - current_kids = current_demographics.get("inferredHasKids") - current_status = current_demographics.get("inferredRelationshipStatus") - current_employment = current_demographics.get("inferredEmploymentStatus") - current_education = current_demographics.get("inferredEducationLevel") - current_age_bracket = current_demographics.get("inferredAgeBracket") - - # Use dot notation for updates within the nested object - if inferred_kids is not None and inferred_kids != current_kids: - update_payload["demographicData.inferredHasKids"] = inferred_kids - logger.info(f"Inference update for {email}: demographicData.inferredHasKids -> {inferred_kids} (was {current_kids})") - if inferred_status is not None and inferred_status != current_status: - update_payload["demographicData.inferredRelationshipStatus"] = inferred_status - logger.info(f"Inference update for {email}: demographicData.inferredRelationshipStatus -> {inferred_status} (was {current_status})") - if inferred_employment is not None and inferred_employment != current_employment: - update_payload["demographicData.inferredEmploymentStatus"] = inferred_employment - logger.info(f"Inference update for {email}: demographicData.inferredEmploymentStatus -> {inferred_employment} (was {current_employment})") - if inferred_education is not None and inferred_education != current_education: - update_payload["demographicData.inferredEducationLevel"] = inferred_education - logger.info(f"Inference update for {email}: demographicData.inferredEducationLevel -> {inferred_education} (was {current_education})") - if inferred_age_bracket is not None and inferred_age_bracket != current_age_bracket: - update_payload["demographicData.inferredAgeBracket"] = inferred_age_bracket - logger.info(f"Inference update for {email}: demographicData.inferredAgeBracket -> {inferred_age_bracket} (was {current_age_bracket})") + now = datetime.now() + + # Simplified: Only updates the inferred field if the new inference differs from the current inferred value + def check_and_set(field_name: str, inferred_value: Any): + # Note: field_name here is the *inferred* field name (e.g., "inferredHasKids") + current_inferred_value = current_demographics.get(field_name) + + if inferred_value is not None and inferred_value != current_inferred_value: + # Use dot notation for nested update + db_field_name = f"demographicData.{field_name}" + update_payload[db_field_name] = inferred_value + logger.info(f"Inference update for {email}: {db_field_name} -> {inferred_value} (was {current_inferred_value})") + # No need to log skipping based on verification anymore + + # Map inferred values to their DB field names + check_and_set("inferredHasKids", inferred_kids) + check_and_set("inferredRelationshipStatus", inferred_status) + check_and_set("inferredEmploymentStatus", inferred_employment) + check_and_set("inferredEducationLevel", inferred_education) + check_and_set("inferredGender", inferred_gender) # --- End Prepare update payload --- - # Update user document in DB if there are changes + # --- Update user document in DB if there are changes --- if update_payload: - logger.info(f"Inference: Found updates for {email}: {update_payload.keys()}") - update_payload["updatedAt"] = datetime.now() # Update timestamp + update_payload["updatedAt"] = now # Update timestamp result = await db.users.update_one( {"_id": user_object_id}, {"$set": update_payload} ) if result.modified_count > 0: updated = True - logger.info(f"Inference: Successfully updated user document for {email}") - - # --- Invalidate Caches on Successful Update --- + logger.info(f"Inference: Successfully updated inferred demographic data for user {user_id}") + # --- Invalidate Caches --- auth0_id = user.get("auth0Id") if auth0_id: - # Invalidate user data and general preferences await invalidate_cache(f"{CACHE_KEYS['USER_DATA']}{auth0_id}") - await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") - logger.info(f"Inference: Invalidated USER_DATA and PREFERENCES cache for {auth0_id}") - - # Invalidate store-specific preferences for opt-in stores + await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") # Invalidate prefs as demographics changed + # Invalidate store-specific caches if opt-in stores exist if user.get("privacySettings", {}).get("optInStores"): - user_object_id_str = str(user_object_id) for store_id in user["privacySettings"]["optInStores"]: - await invalidate_cache(f"{CACHE_KEYS['STORE_PREFERENCES']}{user_object_id_str}:{store_id}") - logger.info(f"Inference: Invalidated STORE_PREFERENCES caches for {auth0_id}") + # Use user_id (ObjectId string) for store cache key consistency + await invalidate_cache(f"{CACHE_KEYS['STORE_PREFERENCES']}{user_id}:{store_id}") + logger.info(f"Inference: Invalidated relevant caches for user {auth0_id}") # --- End Cache Invalidation --- else: - logger.warning(f"Inference: Update payload generated but DB modify count was 0 for {email}. Payload: {update_payload}") + logger.warning(f"Inference: Update attempted for {user_id} but no documents were modified.") else: - logger.info(f"Inference: No demographic updates found for {email}") - + logger.info(f"Inference: No inferred demographic updates needed for {user_id}") except Exception as e: logger.error(f"Error during demographic inference for user {user_id}: {str(e)}", exc_info=True) + # Do not return True here, as the update didn't necessarily succeed + updated = False # Ensure updated is False on error return updated + # --- End Main Inference Runner --- diff --git a/ml-service/app/services/preferenceProcessor.py b/ml-service/app/services/preferenceProcessor.py index 2300526..8a022b2 100644 --- a/ml-service/app/services/preferenceProcessor.py +++ b/ml-service/app/services/preferenceProcessor.py @@ -47,6 +47,10 @@ async def process_user_data(data: UserDataEntry, db) -> UserPreferences: user_id = str(user["_id"]) # Use the confirmed user ID from DB logger.info(f"Found user {email} with DB ID {user_id}") + # --- Check Inference Permission --- + privacy_settings = user.get("privacySettings", {}) + allow_inference = privacy_settings.get("allowInference", True) # Default to True if missing + # Extract demographics from the nested 'demographicData' field user_demographics_nested = user.get("demographicData", {}) # Flatten the dictionary to pass to processing functions @@ -155,43 +159,43 @@ async def process_user_data(data: UserDataEntry, db) -> UserPreferences: except Exception as e: logger.error(f"Failed to update userData status for {email}: {str(e)}") - # --- Run Demographic Inference (After main processing) --- + # --- Run Demographic Inference (Conditionally) --- inference_updated_user = False - try: - logger.info(f"Starting demographic inference for user {email} ({user_id})") - inference_updated_user = await run_inference_for_user(user_id, email, db) - if inference_updated_user: - logger.info(f"Demographic inference updated user document for {email}") - # Cache invalidation is handled within run_inference_for_user - else: - logger.info(f"Demographic inference did not result in updates for user {email}") - except Exception as inference_error: - logger.error(f"Demographic inference failed for user {email}: {inference_error}", exc_info=True) + if allow_inference: # <-- Check the flag + try: + logger.info(f"Starting demographic inference for user {email} ({user_id}) as allowInference is True.") + inference_updated_user = await run_inference_for_user(user_id, email, db) + if inference_updated_user: + logger.info(f"Demographic inference updated user document for {email}") + # Cache invalidation is handled within run_inference_for_user + else: + logger.info(f"Demographic inference did not result in updates for user {email}") + except Exception as inference_error: + logger.error(f"Demographic inference failed for user {email}: {inference_error}", exc_info=True) + else: + logger.info(f"Skipping demographic inference for user {email} ({user_id}) as allowInference is False.") # --- End Demographic Inference --- - # Invalidate user preferences cache using auth0Id (if not already done by inference) - # This ensures caches are cleared even if inference didn't run or update + # Invalidate relevant caches unconditionally after processing and inference attempt auth0_id = user.get("auth0Id") if auth0_id: - # Check if inference already invalidated caches for this user - if not inference_updated_user: - logger.info(f"Running post-processing cache invalidation for {auth0_id} as inference didn't update.") - await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") - logger.info(f"Invalidated PREFERENCES cache for user {auth0_id} (post-processing)") - - # Invalidate store-specific caches if opt-in stores exist - if user.get("privacySettings", {}).get("optInStores"): - for store_id in user["privacySettings"]["optInStores"]: - store_pref_key = f"{CACHE_KEYS['STORE_PREFERENCES']}{user_id}:{store_id}" - await invalidate_cache(store_pref_key) - logger.info(f"Invalidated STORE_PREFERENCES caches for user {auth0_id} (post-processing)") - else: - logger.info(f"Skipping post-processing cache invalidation as inference already handled it for {auth0_id}") + logger.info(f"Running post-processing cache invalidation for {auth0_id}.") + # Invalidate general user data and preferences + await invalidate_cache(f"{CACHE_KEYS['USER_DATA']}{auth0_id}") + await invalidate_cache(f"{CACHE_KEYS['PREFERENCES']}{auth0_id}") + logger.info(f"Invalidated USER_DATA and PREFERENCES caches for user {auth0_id} (post-processing)") + + # Invalidate store-specific caches if opt-in stores exist + if user.get("privacySettings", {}).get("optInStores"): + user_object_id_str = str(user["_id"]) # Use user_id from the fetched user object + for store_id in user["privacySettings"]["optInStores"]: + store_pref_key = f"{CACHE_KEYS['STORE_PREFERENCES']}{user_object_id_str}:{store_id}" + await invalidate_cache(store_pref_key) + logger.info(f"Invalidated STORE_PREFERENCES caches for user {auth0_id} (post-processing)") else: logger.warning(f"Cannot invalidate caches for user {email} as auth0Id is missing.") - # Return updated preferences in the expected format return UserPreferences( user_id=user_id, diff --git a/web/src/api/types/data-contracts.ts b/web/src/api/types/data-contracts.ts index 3f263ea..9c68958 100644 --- a/web/src/api/types/data-contracts.ts +++ b/web/src/api/types/data-contracts.ts @@ -28,43 +28,29 @@ export interface User { username?: string; /** @pattern ^\+?[\d\s-]+$ */ phone?: string; - /** - * User gender identity (e.g., 'male', 'female', 'non-binary', 'prefer_not_to_say') - * @example "female" - */ - gender?: string | null; - /** - * User income bracket category (e.g., '<25k', '25k-50k', '50k-100k', '100k-200k', '>200k', 'prefer_not_to_say') - * @example "50k-100k" - */ - incomeBracket?: string | null; - /** - * User country of residence (ISO 3166-1 alpha-2 code) - * @example "US" - */ - country?: string | null; - /** - * User age - * @format int32 - * @example 35 - */ - age?: number | null; - privacySettings: { - /** @default false */ - dataSharingConsent?: boolean; - /** @default false */ - anonymizeData?: boolean; - /** List of store IDs user has opted into */ - optInStores?: string[]; - /** List of store IDs user has opted out from */ - optOutStores?: string[]; - }; + privacySettings: PrivacySettings; + /** User-provided and inferred demographic information */ + demographicData?: DemographicData; /** @format date-time */ createdAt?: string; /** @format date-time */ updatedAt?: string; } +export interface PrivacySettings { + /** User consent to share aggregated/anonymized data. */ + dataSharingConsent: boolean; + /** + * Allow Tapiro to infer demographic data based on user activity. + * @default true + */ + allowInference?: boolean; + /** List of store IDs the user explicitly allows data sharing with. */ + optInStores?: string[]; + /** List of store IDs the user explicitly blocks data sharing with. */ + optOutStores?: string[]; +} + export interface Store { storeId?: string; /** Auth0 organization ID */ @@ -88,6 +74,8 @@ export interface UserCreate { preferences?: PreferenceItem[]; /** User's consent for data sharing */ dataSharingConsent: boolean; + /** Allow Tapiro to infer demographic data (defaults to true if omitted). */ + allowInference?: boolean; /** User gender identity */ gender?: string | null; /** User income bracket category */ @@ -114,27 +102,60 @@ export interface StoreCreate { export interface UserUpdate { /** User's unique username */ username?: string; - /** User's phone number (E.164 format recommended) */ + /** + * User's phone number (E.164 format recommended) + * @pattern ^\+?[\d\s-]+$ + */ phone?: string; /** User interest preferences with taxonomy categorization */ preferences?: PreferenceItem[]; - privacySettings?: { - dataSharingConsent?: boolean; - anonymizeData?: boolean; - optInStores?: string[]; - optOutStores?: string[]; + privacySettings?: PrivacySettings; + /** Updatable user-provided demographic information. Setting a value here implies verification and may clear inferred values. */ + demographicData?: { + /** User-provided gender identity */ + gender?: "male" | "female" | "non-binary" | "prefer_not_to_say" | null; + /** User-provided income bracket category */ + incomeBracket?: + | "<25k" + | "25k-50k" + | "50k-100k" + | "100k-200k" + | ">200k" + | "prefer_not_to_say" + | null; + /** User-provided country of residence (e.g., ISO 3166-1 alpha-2 code) */ + country?: string | null; + /** + * User-provided age. Setting this clears the inferred age bracket. + * @format int32 + * @min 0 + */ + age?: number | null; + /** User-provided: Does the user have children? */ + hasKids?: boolean | null; + /** User-provided: User relationship status */ + relationshipStatus?: + | "single" + | "relationship" + | "married" + | "prefer_not_to_say" + | null; + /** User-provided: User employment status */ + employmentStatus?: + | "employed" + | "unemployed" + | "student" + | "prefer_not_to_say" + | null; + /** User-provided: User education level */ + educationLevel?: + | "high_school" + | "bachelors" + | "masters" + | "doctorate" + | "prefer_not_to_say" + | null; }; - /** User gender identity */ - gender?: string | null; - /** User income bracket category */ - incomeBracket?: string | null; - /** User country of residence (ISO 3166-1 alpha-2 code) */ - country?: string | null; - /** - * User age - * @format int32 - */ - age?: number | null; } export interface ApiKey { @@ -402,6 +423,90 @@ export interface Taxonomy { version: string; } +/** User-provided and inferred demographic information */ +export interface DemographicData { + /** + * User-provided gender identity + * @example "female" + */ + gender?: "male" | "female" | "non-binary" | "prefer_not_to_say" | null; + /** + * User-provided income bracket category + * @example "50k-100k" + */ + incomeBracket?: + | "<25k" + | "25k-50k" + | "50k-100k" + | "100k-200k" + | ">200k" + | "prefer_not_to_say" + | null; + /** + * User-provided country of residence (e.g., ISO 3166-1 alpha-2 code) + * @example "US" + */ + country?: string | null; + /** + * User-provided age + * @format int32 + * @min 0 + * @example 35 + */ + age?: number | null; + /** + * User-provided: Does the user have children? + * @example true + */ + hasKids?: boolean | null; + /** + * User-provided: User relationship status + * @example "married" + */ + relationshipStatus?: + | "single" + | "relationship" + | "married" + | "prefer_not_to_say" + | null; + /** + * User-provided: User employment status + * @example "employed" + */ + employmentStatus?: + | "employed" + | "unemployed" + | "student" + | "prefer_not_to_say" + | null; + /** + * User-provided: User education level + * @example "bachelors" + */ + educationLevel?: + | "high_school" + | "bachelors" + | "masters" + | "doctorate" + | "prefer_not_to_say" + | null; + /** Inferred: Does the user likely have children? (null if unknown or user provided) */ + inferredHasKids?: boolean | null; + /** Inferred: User relationship status (null if unknown or user provided) */ + inferredRelationshipStatus?: "single" | "relationship" | "married" | null; + /** Inferred: User employment status (null if unknown or user provided) */ + inferredEmploymentStatus?: "employed" | "unemployed" | "student" | null; + /** Inferred: User education level (null if unknown or user provided) */ + inferredEducationLevel?: + | "high_school" + | "bachelors" + | "masters" + | "doctorate" + | null; + /** Inferred: User gender identity (null if unknown or user provided) */ + inferredGender?: "male" | "female" | "non-binary" | null; +} + export interface RecentUserDataEntry { /** The unique ID of the userData entry. */ _id?: string; diff --git a/web/src/components/auth/UserRegistrationForm.tsx b/web/src/components/auth/UserRegistrationForm.tsx index ad4951b..6975b77 100644 --- a/web/src/components/auth/UserRegistrationForm.tsx +++ b/web/src/components/auth/UserRegistrationForm.tsx @@ -3,23 +3,29 @@ import { Button, Checkbox, Label, - Modal, // Import Modal components + Modal, ModalHeader, ModalBody, ModalFooter, - Popover, // <-- Import Popover - Select, // <-- Import Select - TextInput, // <-- Import TextInput + Popover, + Select, + TextInput, } from "flowbite-react"; import { UserCreate } from "../../api/types/data-contracts"; import LoadingSpinner from "../common/LoadingSpinner"; -import { HiCheckCircle, HiInformationCircle } from "react-icons/hi"; // Import icons +import { HiCheckCircle, HiInformationCircle } from "react-icons/hi"; +import countryData from "../../data/countries.json"; interface UserRegistrationFormProps { onSubmit: (userData: UserCreate) => void; isLoading: boolean; } +interface CountryOption { + value: string; + label: string; +} + // Define options for selects const genderOptions = [ { value: "", label: "Select Gender (Optional)" }, @@ -39,6 +45,20 @@ const incomeOptions = [ { value: "prefer_not_to_say", label: "Prefer not to say" }, ]; +// --- Transform the imported country data object into an array --- +const typedCountryData: CountryOption[] = Object.entries(countryData).map( + ([code, name]) => ({ value: code, label: name }), +); +// Sort alphabetically by label (optional but good UX) +typedCountryData.sort((a, b) => a.label.localeCompare(b.label)); + +// --- Create the final country options array --- +const countryOptions: CountryOption[] = [ + { value: "", label: "Select Country (Optional)" }, + ...typedCountryData, // Spread the transformed array +]; +// --- End Country Options --- + export function UserRegistrationForm({ onSubmit, isLoading, @@ -49,6 +69,8 @@ export function UserRegistrationForm({ const [showConsentModal, setShowConsentModal] = useState(false); // State to track if consent has been explicitly accepted via the modal const [consentAccepted, setConsentAccepted] = useState(false); + // --- Add state for allowInference --- + const [allowInference, setAllowInference] = useState(true); // Default to true // Add state for demographic fields const [gender, setGender] = useState(null); @@ -67,13 +89,12 @@ export function UserRegistrationForm({ } const userData: UserCreate = { - // Use the state variable linked to the checkbox dataSharingConsent: dataSharingConsent, - preferences: [], // We can leave this empty for now - // Add demographic data, ensuring null if empty string or invalid number + allowInference: allowInference, // <-- Include allowInference + preferences: [], gender: gender || null, incomeBracket: incomeBracket || null, - country: country || null, + country: country || null, // country state is already used here age: age !== null && !isNaN(age) ? Number(age) : null, }; @@ -92,7 +113,17 @@ export function UserRegistrationForm({ setShowConsentModal(false); }; - // Content for the popover + // Define content for the inference popover + const inferencePopoverContent = ( +
+

+ Tapiro can estimate demographic details like age group or interests + based on your activity to improve personalization, even if you don't + provide them directly. You can disable this anytime. +

+
+ ); + const popoverContent = (

@@ -150,20 +181,27 @@ export function UserRegistrationForm({ ))}

- {/* Country Input */} + + {/* --- Country Select (Replaces TextInput) --- */}
- - Country +
+ {/* --- End Country Select --- */} + {/* Age Input */}
@@ -183,7 +221,8 @@ export function UserRegistrationForm({
{/* Consent Section */} -
+
+ {/* --- Data Sharing Consent (Existing) --- */} {/* Conditionally wrap Checkbox/Label in Popover */} {!consentAccepted ? ( @@ -224,6 +263,27 @@ export function UserRegistrationForm({
)} + {/* --- Allow Inference Toggle --- */} +
+ setAllowInference(e.target.checked)} + /> + +
+ {/* --- End Allow Inference Toggle --- */} + {/* Button to open the modal remains the same */}
diff --git a/web/src/data/countries.json b/web/src/data/countries.json new file mode 100644 index 0000000..6f5df5c --- /dev/null +++ b/web/src/data/countries.json @@ -0,0 +1,248 @@ +{ + "AF": "Afghanistan", + "AX": "Aland Islands", + "AL": "Albania", + "DZ": "Algeria", + "AS": "American Samoa", + "AD": "Andorra", + "AO": "Angola", + "AI": "Anguilla", + "AQ": "Antarctica", + "AG": "Antigua And Barbuda", + "AR": "Argentina", + "AM": "Armenia", + "AW": "Aruba", + "AU": "Australia", + "AT": "Austria", + "AZ": "Azerbaijan", + "BS": "Bahamas", + "BH": "Bahrain", + "BD": "Bangladesh", + "BB": "Barbados", + "BY": "Belarus", + "BE": "Belgium", + "BZ": "Belize", + "BJ": "Benin", + "BM": "Bermuda", + "BT": "Bhutan", + "BO": "Bolivia", + "BA": "Bosnia And Herzegovina", + "BW": "Botswana", + "BV": "Bouvet Island", + "BR": "Brazil", + "IO": "British Indian Ocean Territory", + "BN": "Brunei Darussalam", + "BG": "Bulgaria", + "BF": "Burkina Faso", + "BI": "Burundi", + "KH": "Cambodia", + "CM": "Cameroon", + "CA": "Canada", + "CV": "Cape Verde", + "KY": "Cayman Islands", + "CF": "Central African Republic", + "TD": "Chad", + "CL": "Chile", + "CN": "China", + "CX": "Christmas Island", + "CC": "Cocos (Keeling) Islands", + "CO": "Colombia", + "KM": "Comoros", + "CG": "Congo", + "CD": "Congo, Democratic Republic", + "CK": "Cook Islands", + "CR": "Costa Rica", + "CI": "Cote D\"Ivoire", + "HR": "Croatia", + "CU": "Cuba", + "CY": "Cyprus", + "CZ": "Czech Republic", + "DK": "Denmark", + "DJ": "Djibouti", + "DM": "Dominica", + "DO": "Dominican Republic", + "EC": "Ecuador", + "EG": "Egypt", + "SV": "El Salvador", + "GQ": "Equatorial Guinea", + "ER": "Eritrea", + "EE": "Estonia", + "ET": "Ethiopia", + "FK": "Falkland Islands (Malvinas)", + "FO": "Faroe Islands", + "FJ": "Fiji", + "FI": "Finland", + "FR": "France", + "GF": "French Guiana", + "PF": "French Polynesia", + "TF": "French Southern Territories", + "GA": "Gabon", + "GM": "Gambia", + "GE": "Georgia", + "DE": "Germany", + "GH": "Ghana", + "GI": "Gibraltar", + "GR": "Greece", + "GL": "Greenland", + "GD": "Grenada", + "GP": "Guadeloupe", + "GU": "Guam", + "GT": "Guatemala", + "GG": "Guernsey", + "GN": "Guinea", + "GW": "Guinea-Bissau", + "GY": "Guyana", + "HT": "Haiti", + "HM": "Heard Island & Mcdonald Islands", + "VA": "Holy See (Vatican City State)", + "HN": "Honduras", + "HK": "Hong Kong", + "HU": "Hungary", + "IS": "Iceland", + "IN": "India", + "ID": "Indonesia", + "IR": "Iran, Islamic Republic Of", + "IQ": "Iraq", + "IE": "Ireland", + "IM": "Isle Of Man", + "IL": "Israel", + "IT": "Italy", + "JM": "Jamaica", + "JP": "Japan", + "JE": "Jersey", + "JO": "Jordan", + "KZ": "Kazakhstan", + "KE": "Kenya", + "KI": "Kiribati", + "KR": "Korea", + "KP": "North Korea", + "KW": "Kuwait", + "KG": "Kyrgyzstan", + "LA": "Lao People\"s Democratic Republic", + "LV": "Latvia", + "LB": "Lebanon", + "LS": "Lesotho", + "LR": "Liberia", + "LY": "Libyan Arab Jamahiriya", + "LI": "Liechtenstein", + "LT": "Lithuania", + "LU": "Luxembourg", + "MO": "Macao", + "MK": "Macedonia", + "MG": "Madagascar", + "MW": "Malawi", + "MY": "Malaysia", + "MV": "Maldives", + "ML": "Mali", + "MT": "Malta", + "MH": "Marshall Islands", + "MQ": "Martinique", + "MR": "Mauritania", + "MU": "Mauritius", + "YT": "Mayotte", + "MX": "Mexico", + "FM": "Micronesia, Federated States Of", + "MD": "Moldova", + "MC": "Monaco", + "MN": "Mongolia", + "ME": "Montenegro", + "MS": "Montserrat", + "MA": "Morocco", + "MZ": "Mozambique", + "MM": "Myanmar", + "NA": "Namibia", + "NR": "Nauru", + "NP": "Nepal", + "NL": "Netherlands", + "AN": "Netherlands Antilles", + "NC": "New Caledonia", + "NZ": "New Zealand", + "NI": "Nicaragua", + "NE": "Niger", + "NG": "Nigeria", + "NU": "Niue", + "NF": "Norfolk Island", + "MP": "Northern Mariana Islands", + "NO": "Norway", + "OM": "Oman", + "PK": "Pakistan", + "PW": "Palau", + "PS": "Palestinian Territory, Occupied", + "PA": "Panama", + "PG": "Papua New Guinea", + "PY": "Paraguay", + "PE": "Peru", + "PH": "Philippines", + "PN": "Pitcairn", + "PL": "Poland", + "PT": "Portugal", + "PR": "Puerto Rico", + "QA": "Qatar", + "RE": "Reunion", + "RO": "Romania", + "RU": "Russian Federation", + "RW": "Rwanda", + "BL": "Saint Barthelemy", + "SH": "Saint Helena", + "KN": "Saint Kitts And Nevis", + "LC": "Saint Lucia", + "MF": "Saint Martin", + "PM": "Saint Pierre And Miquelon", + "VC": "Saint Vincent And Grenadines", + "WS": "Samoa", + "SM": "San Marino", + "ST": "Sao Tome And Principe", + "SA": "Saudi Arabia", + "SN": "Senegal", + "RS": "Serbia", + "SC": "Seychelles", + "SL": "Sierra Leone", + "SG": "Singapore", + "SK": "Slovakia", + "SI": "Slovenia", + "SB": "Solomon Islands", + "SO": "Somalia", + "ZA": "South Africa", + "GS": "South Georgia And Sandwich Isl.", + "ES": "Spain", + "LK": "Sri Lanka", + "SD": "Sudan", + "SR": "Suriname", + "SJ": "Svalbard And Jan Mayen", + "SZ": "Swaziland", + "SE": "Sweden", + "CH": "Switzerland", + "SY": "Syrian Arab Republic", + "TW": "Taiwan", + "TJ": "Tajikistan", + "TZ": "Tanzania", + "TH": "Thailand", + "TL": "Timor-Leste", + "TG": "Togo", + "TK": "Tokelau", + "TO": "Tonga", + "TT": "Trinidad And Tobago", + "TN": "Tunisia", + "TR": "Turkey", + "TM": "Turkmenistan", + "TC": "Turks And Caicos Islands", + "TV": "Tuvalu", + "UG": "Uganda", + "UA": "Ukraine", + "AE": "United Arab Emirates", + "GB": "United Kingdom", + "US": "United States", + "UM": "United States Outlying Islands", + "UY": "Uruguay", + "UZ": "Uzbekistan", + "VU": "Vanuatu", + "VE": "Venezuela", + "VN": "Vietnam", + "VG": "Virgin Islands, British", + "VI": "Virgin Islands, U.S.", + "WF": "Wallis And Futuna", + "EH": "Western Sahara", + "YE": "Yemen", + "ZM": "Zambia", + "ZW": "Zimbabwe" +} diff --git a/web/src/pages/UserDashboard/UserDashboard.tsx b/web/src/pages/UserDashboard/UserDashboard.tsx index 2a3c7ce..2ecee40 100644 --- a/web/src/pages/UserDashboard/UserDashboard.tsx +++ b/web/src/pages/UserDashboard/UserDashboard.tsx @@ -146,7 +146,6 @@ const renderCustomizedLabel = ({ outerRadius, percent, }: CustomizedLabelProps) => { - // Use the defined interface const radius = innerRadius + (outerRadius - innerRadius) * 0.5; const x = cx + radius * Math.cos(-midAngle * RADIAN); const y = cy + radius * Math.sin(-midAngle * RADIAN); @@ -765,25 +764,25 @@ export default function UserDashboard() { diff --git a/web/src/pages/UserDashboard/UserPreferencesPage.tsx b/web/src/pages/UserDashboard/UserPreferencesPage.tsx index 000251b..ea805c3 100644 --- a/web/src/pages/UserDashboard/UserPreferencesPage.tsx +++ b/web/src/pages/UserDashboard/UserPreferencesPage.tsx @@ -1,4 +1,3 @@ -import React, { useState, useEffect, useMemo } from "react"; import { Card, Button, @@ -12,42 +11,58 @@ import { TextInput, Select, RangeSlider, - List, // Import List - ListItem, // Import ListItem + List, + ListItem, } from "flowbite-react"; import { - HiInformationCircle, - HiPencil, - HiTrash, - HiPlus, HiUser, - HiSparkles, HiOutlineUserCircle, HiOutlineCake, HiOutlineGlobeAlt, HiOutlineCash, - // --- End Add icons --- -} from "react-icons/hi"; -import { useForm, Controller, SubmitHandler } from "react-hook-form"; + HiOutlineAcademicCap, // <-- Icon for education + HiOutlineBriefcase, // <-- Icon for employment + HiOutlineUsers, // <-- Icon for relationship + HiOutlineHeart, // <-- Icon for hasKids + HiInformationCircle, + HiSparkles, + HiPlus, + HiPencil, + HiTrash, + HiCheck, + HiX, +} from "react-icons/hi"; // <-- Add new icons +import { useForm, SubmitHandler, Controller } from "react-hook-form"; +import { useEffect, useState, useMemo } from "react"; import { useUserProfile, - useUpdateUserProfile, useUserPreferences, + useUpdateUserProfile, useUpdateUserPreferences, -} from "../../api/hooks/useUserHooks"; -import { useTaxonomy } from "../../api/hooks/useTaxonomyHooks"; -import LoadingSpinner from "../../components/common/LoadingSpinner"; -import ErrorDisplay from "../../components/common/ErrorDisplay"; +} from "../../api/hooks/useUserHooks"; // <-- Corrected import path +import { useTaxonomy } from "../../api/hooks/useTaxonomyHooks"; // <-- Corrected import path import { UserUpdate, PreferenceItem, TaxonomyCategory, + DemographicData, // <-- Import DemographicData type } from "../../api/types/data-contracts"; +import LoadingSpinner from "../../components/common/LoadingSpinner"; +import ErrorDisplay from "../../components/common/ErrorDisplay"; +import countryData from "../../data/countries.json"; // --- Form Types --- +// Update to include all user-editable demographic fields type DemographicsFormData = Pick< - UserUpdate, - "gender" | "age" | "country" | "incomeBracket" + DemographicData, // Use DemographicData type directly + | "gender" + | "age" + | "country" + | "incomeBracket" + | "hasKids" + | "relationshipStatus" + | "employmentStatus" + | "educationLevel" >; type PreferenceFormData = { category: string; @@ -55,12 +70,90 @@ type PreferenceFormData = { attributes?: Record; }; -// --- Mini Demographic Card Component (Add this) --- +// --- Define options for selects --- +const genderOptions = [ + { value: "", label: "Select Gender" }, + { value: "male", label: "Male" }, + { value: "female", label: "Female" }, + { value: "non-binary", label: "Non-binary" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + +const incomeOptions = [ + { value: "", label: "Select Income Bracket" }, // Make placeholder less optional here + { value: "<25k", label: "< $25,000" }, + { value: "25k-50k", label: "$25,000 - $49,999" }, + { value: "50k-100k", label: "$50,000 - $99,999" }, + { value: "100k-200k", label: "$100,000 - $199,999" }, + { value: ">200k", label: "> $200,000" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + +// --- NEW Options --- +const relationshipOptions = [ + { value: "", label: "Select Relationship Status" }, + { value: "single", label: "Single" }, + { value: "relationship", label: "In a relationship" }, + { value: "married", label: "Married" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + +const employmentOptions = [ + { value: "", label: "Select Employment Status" }, + { value: "employed", label: "Employed" }, + { value: "unemployed", label: "Unemployed" }, + { value: "student", label: "Student" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; + +const educationOptions = [ + { value: "", label: "Select Education Level" }, + { value: "high_school", label: "High School" }, + { value: "bachelors", label: "Bachelor's Degree" }, + { value: "masters", label: "Master's Degree" }, + { value: "doctorate", label: "Doctorate" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, +]; +// --- End NEW Options --- + +// --- Has Kids Options --- +const hasKidsOptions = [ + { value: "", label: "Select an option" }, // Default/unset option + { value: "true", label: "Yes" }, + { value: "false", label: "No" }, + { value: "prefer_not_to_say", label: "Prefer not to say" }, // Represented by null in the data +]; +// --- End Has Kids Options --- + +// --- Country Options (copied from UserRegistrationForm) --- +interface CountryOption { + value: string; + label: string; +} +const typedCountryData: CountryOption[] = Object.entries(countryData).map( + ([code, name]) => ({ value: code, label: name }), +); +typedCountryData.sort((a, b) => a.label.localeCompare(b.label)); +const countryOptions: CountryOption[] = [ + { value: "", label: "Select Country" }, // Make placeholder less optional here + ...typedCountryData, +]; +// --- End Country Options --- + +// --- Mini Demographic Card Component --- +// filepath: /Users/cdevmina/Projects/Tapiro/web/src/pages/UserDashboard/UserPreferencesPage.tsx interface DemoInfoCardProps { icon: React.ElementType; label: string; value: string | number | null | undefined; - isLoading?: boolean; // Optional loading state if needed later + isLoading?: boolean; + isInferred?: boolean; // Added: Flag for inferred data + fieldName?: keyof DemographicsFormData; // Added: Field name for verification + onVerify?: ( + fieldName: keyof DemographicsFormData, + // --- CHANGE HERE --- + valueToVerify: string | number | boolean | null | undefined, + ) => void; // Added: Handler for verify button } const DemoInfoCard: React.FC = ({ @@ -68,6 +161,9 @@ const DemoInfoCard: React.FC = ({ label, value, isLoading, + isInferred, // Destructure + fieldName, // Destructure + onVerify, // Destructure }) => (
@@ -78,9 +174,30 @@ const DemoInfoCard: React.FC = ({ {isLoading ? ( ) : ( -

- {value || "Not set"} -

+
+ {" "} + {/* Wrap value and button */} +

+ {value || "Not set"} + {isInferred && + value && ( // Show "(inferred)" text + + (inferred) + + )} +

+ {/* Show Verify button if inferred, has value, not loading, and handler provided */} + {isInferred && value && !isLoading && fieldName && onVerify && ( + + )} +
)}
@@ -88,7 +205,7 @@ const DemoInfoCard: React.FC = ({ // --- End Mini Demographic Card Component --- const UserPreferencesPage: React.FC = () => { - // --- Data Fetching --- + // --- Data Fetching (Keep existing) --- const { data: userProfile, isLoading: profileLoading, @@ -105,7 +222,7 @@ const UserPreferencesPage: React.FC = () => { error: taxonomyError, } = useTaxonomy(); - // --- Mutations --- + // --- Mutations (Keep existing) --- const { mutate: updateProfile, isPending: isUpdatingProfile, @@ -117,7 +234,7 @@ const UserPreferencesPage: React.FC = () => { error: updatePreferencesError, } = useUpdateUserPreferences(); - // --- State --- + // --- State (Keep existing) --- const [isEditingDemographics, setIsEditingDemographics] = useState(false); const [showPreferenceModal, setShowPreferenceModal] = useState(false); const [editingPreferenceIndex, setEditingPreferenceIndex] = useState< @@ -129,7 +246,8 @@ const UserPreferencesPage: React.FC = () => { register: registerDemo, handleSubmit: handleDemoSubmit, reset: resetDemoForm, - formState: { isDirty: isDemoDirty }, + setValue: setValueDemo, // <-- Get setValue for verification + formState: { isDirty: isDemoDirty, errors: demoErrors }, // <-- Add errors } = useForm(); const { @@ -143,15 +261,19 @@ const UserPreferencesPage: React.FC = () => { defaultValues: { category: "", attributes: {}, score: 50 }, }); - // --- Effects --- - // Reset demographics form when profile loads or editing starts/stops + // Update useEffect to reset ALL demographic fields when NOT editing useEffect(() => { - if (userProfile && !isEditingDemographics) { + if (userProfile?.demographicData && !isEditingDemographics) { resetDemoForm({ - gender: userProfile.gender || "", - age: userProfile.age || undefined, - country: userProfile.country || "", - incomeBracket: userProfile.incomeBracket || "", + gender: userProfile.demographicData.gender ?? null, // Use ?? null + age: userProfile.demographicData.age ?? undefined, // Use ?? for null/undefined + country: userProfile.demographicData.country ?? null, // Use ?? null + incomeBracket: userProfile.demographicData.incomeBracket ?? null, // Use ?? null + hasKids: userProfile.demographicData.hasKids ?? null, // Default to null + relationshipStatus: + userProfile.demographicData.relationshipStatus ?? null, // Use ?? null + employmentStatus: userProfile.demographicData.employmentStatus ?? null, // Use ?? null + educationLevel: userProfile.demographicData.educationLevel ?? null, // Use ?? null }); } }, [userProfile, isEditingDemographics, resetDemoForm]); @@ -168,9 +290,9 @@ const UserPreferencesPage: React.FC = () => { const formAttributes: Record = {}; if (pref.attributes) { Object.entries(pref.attributes).forEach(([key, valueObj]) => { + // Attempt to get the first key if it's an object like { "Blue": 1.0 } let displayValue: string | undefined = undefined; if (typeof valueObj === "object" && valueObj !== null) { - // Attempt to get the first key if it's an object like { "Blue": 1.0 } const firstKey = Object.keys(valueObj)[0]; if (firstKey) displayValue = firstKey; } else if (typeof valueObj === "string") { @@ -235,18 +357,51 @@ const UserPreferencesPage: React.FC = () => { }, [selectedCategoryId, attributeMap]); // --- Handlers --- + // Update onDemoSubmit to handle all fields and nest payload const onDemoSubmit: SubmitHandler = (data) => { - // Filter out empty strings before sending - const payload: Partial = {}; - if (data.gender) payload.gender = data.gender; - if (data.age) payload.age = data.age; - if (data.country) payload.country = data.country; - if (data.incomeBracket) payload.incomeBracket = data.incomeBracket; - - updateProfile(payload as UserUpdate, { - // Cast as UserUpdate - onSuccess: () => setIsEditingDemographics(false), - }); + // Construct the nested demographicData payload + const demoPayload: Partial = {}; + + // Handle each field, setting to null if empty/default + demoPayload.gender = data.gender ? data.gender : null; + demoPayload.age = + data.age !== undefined && data.age !== null && !isNaN(data.age) + ? Number(data.age) + : null; + demoPayload.country = data.country ? data.country : null; + demoPayload.incomeBracket = data.incomeBracket ? data.incomeBracket : null; + // Handle boolean (null is allowed) + // Ensure undefined from form becomes null for API + demoPayload.hasKids = data.hasKids === undefined ? null : data.hasKids; + demoPayload.relationshipStatus = data.relationshipStatus + ? data.relationshipStatus + : null; + demoPayload.employmentStatus = data.employmentStatus + ? data.employmentStatus + : null; + demoPayload.educationLevel = data.educationLevel + ? data.educationLevel + : null; + + // Construct the final UserUpdate payload + const finalPayload: UserUpdate = { + demographicData: demoPayload, + }; + + console.log("Submitting demographic update:", finalPayload); // Debug log + + // Only submit if the form is dirty (React Hook Form tracks this) + if (isDemoDirty) { + updateProfile(finalPayload, { + onSuccess: () => setIsEditingDemographics(false), + onError: (err) => { + console.error("Profile update failed:", err); // Log error + }, + }); + } else { + // No changes detected, just exit edit mode + setIsEditingDemographics(false); + } }; const onPrefSubmit: SubmitHandler = (data) => { @@ -308,7 +463,66 @@ const UserPreferencesPage: React.FC = () => { setEditingPreferenceIndex(index); setShowPreferenceModal(true); }; + const handleVerify = ( + fieldName: keyof DemographicsFormData, + // --- CHANGE HERE --- + valueToVerify: string | number | boolean | null | undefined, + ) => { + setIsEditingDemographics(true); + // Use timeout to ensure state update completes before setting value + setTimeout(() => { + // Convert boolean "Yes"/"No" back to boolean for ToggleSwitch + // --- FIX: Handle potential undefined from valueToVerify --- + let formValue: string | number | boolean | null = valueToVerify ?? null; + + // Find the corresponding value for enum fields OR hasKids + if (fieldName === "hasKids") { + // Find the option matching the display label + const option = hasKidsOptions.find((o) => o.label === valueToVerify); + // Convert the option's string value back to boolean/null + formValue = + option?.value === "true" + ? true + : option?.value === "false" + ? false + : null; + } else if (fieldName === "gender") + formValue = + genderOptions.find((o) => o.label === valueToVerify)?.value ?? null; + else if (fieldName === "country") + formValue = + countryOptions.find((o) => o.label === valueToVerify)?.value ?? null; + else if (fieldName === "incomeBracket") + formValue = + incomeOptions.find((o) => o.label === valueToVerify)?.value ?? null; + else if (fieldName === "relationshipStatus") + formValue = + relationshipOptions.find((o) => o.label === valueToVerify)?.value ?? + null; + else if (fieldName === "employmentStatus") + formValue = + employmentOptions.find((o) => o.label === valueToVerify)?.value ?? + null; + else if (fieldName === "educationLevel") + formValue = + educationOptions.find((o) => o.label === valueToVerify)?.value ?? + null; + // Ensure age is treated as a number or null for the form + else if (fieldName === "age") { + // Use valueToVerify here as formValue might already be null + formValue = typeof valueToVerify === "number" ? valueToVerify : null; + } + + // Use setValueDemo with the potentially transformed formValue + setValueDemo(fieldName, formValue, { shouldDirty: true }); + // Optional: Focus the element after setting value + const element = document.getElementById(fieldName); + element?.focus(); + element?.scrollIntoView({ behavior: "smooth", block: "center" }); + }, 0); + console.log(`Verifying ${fieldName} with value:`, valueToVerify); + }; // --- Render Logic --- const isLoading = profileLoading || preferencesLoading || taxonomyLoading; const error = profileError || preferencesError || taxonomyError; @@ -328,14 +542,31 @@ const UserPreferencesPage: React.FC = () => { ); } + // Helper to format boolean/null + const formatBoolean = (value: boolean | null | undefined): string => { + if (value === true) return "Yes"; + if (value === false) return "No"; + return "Not set"; + }; + + // Helper to format enum values + const formatEnum = ( + value: string | null | undefined, + options: { value: string; label: string }[], + ): string => { + const found = options.find((opt) => opt.value === value); + return found?.label || value || "Not set"; + }; + return (

Manage Your Profile & Interests

-
- {/* --- Demographics Section (Left Column on Large Screens) --- */} + {/* wrap both cards in a grid */} +
+ {/* --- Demographics Section (Left) */}

@@ -344,7 +575,7 @@ const UserPreferencesPage: React.FC = () => {

{!isEditingDemographics && ( -
) : ( -
+ // --- DISPLAY VIEW (Combined User-Provided and Inferred) --- +
+ {/* User Provided or Verified */} + + + + + + {/* Display inferred values ONLY if user hasn't provided one */} + {!userProfile?.demographicData?.gender && + userProfile?.demographicData?.inferredGender && ( + + )} + {!userProfile?.demographicData?.hasKids && + userProfile?.demographicData?.inferredHasKids !== null && ( + + )} + {!userProfile?.demographicData?.relationshipStatus && + userProfile?.demographicData?.inferredRelationshipStatus && ( + + )} + {!userProfile?.demographicData?.employmentStatus && + userProfile?.demographicData?.inferredEmploymentStatus && ( + + )} + {!userProfile?.demographicData?.educationLevel && + userProfile?.demographicData?.inferredEducationLevel && ( + + )}
)} - {/* --- Preferences Section (Right Column on Large Screens) --- */} + {/* --- Your Interests Card (Right) */}

@@ -474,77 +983,61 @@ const UserPreferencesPage: React.FC = () => {
{preferencesData?.preferences && preferencesData.preferences.length > 0 ? ( - + {preferencesData.preferences.map((pref, index) => { - // Extract attribute display logic - const attributesDisplay = - pref.attributes && - Object.entries(pref.attributes).map(([key, valueObj]) => { - let displayValue = "[Complex Value]"; - if (typeof valueObj === "object" && valueObj !== null) { - const firstKey = Object.keys(valueObj)[0]; - if (firstKey) displayValue = firstKey; - } else if (typeof valueObj === "string") { - displayValue = valueObj; - } - return { key, displayValue }; - }); + const categoryName = + categoryMap.get(pref.category)?.name || pref.category; + const attributeEntries = Object.entries( + pref.attributes || {}, + ); return ( -
- - {categoryMap.get(pref.category || "")?.name || - "Unknown Category"} - - - Score:{" "} - {pref.score !== null && pref.score !== undefined - ? Math.round(pref.score * 100) - : "N/A"} - - {attributesDisplay && attributesDisplay.length > 0 && ( -
- {attributesDisplay.map(({ key, displayValue }) => ( - - - {attributeMap - .get(pref.category || "") - ?.get(key) || key} - : - {" "} - {displayValue} - - ))} -
- )} -
-
- - +
+
+

+ {categoryName} +

+

+ Score:{" "} + {pref.score !== null && pref.score !== undefined + ? `${Math.round(pref.score * 100)}%` + : "N/A"} +

+ {attributeEntries.length > 0 && ( +
+ Attributes:{" "} + {attributeEntries + .map(([attrKey, valueObj]) => { + // Get the first key (value) from the inner object + const attrValue = Object.keys(valueObj)[0]; + return `${attrKey}: ${attrValue}`; + }) + .join(", ")} +
+ )} +
+
+ + +
); @@ -552,8 +1045,7 @@ const UserPreferencesPage: React.FC = () => { ) : (

- You haven't added any specific interests yet. Click "Add - Interest" to get started. + You haven't added any interests yet.

)}
@@ -589,6 +1081,7 @@ const UserPreferencesPage: React.FC = () => { Select a category... {Array.from(categoryMap.values()) + .filter((cat) => cat.id) // Ensure category has an ID .sort((a, b) => a.name.localeCompare(b.name)) .map((cat) => (
diff --git a/web/src/pages/UserDashboard/UserProfilePage.tsx b/web/src/pages/UserDashboard/UserProfilePage.tsx index c3a9be3..1eca662 100644 --- a/web/src/pages/UserDashboard/UserProfilePage.tsx +++ b/web/src/pages/UserDashboard/UserProfilePage.tsx @@ -14,6 +14,7 @@ import { Modal, // Import Modal ModalBody, ModalHeader, + Tooltip, // <-- Import Tooltip } from "flowbite-react"; // Import necessary icons import { @@ -24,6 +25,7 @@ import { HiX, HiTrash, // Import Trash icon for Delete HiExclamation, // Import Exclamation icon for Modal + HiInformationCircle, // <-- Import for Tooltip } from "react-icons/hi"; import { useUserProfile, @@ -40,7 +42,7 @@ type UserProfileFormData = { username?: string; phone?: string; privacySettings_dataSharingConsent?: boolean; - privacySettings_anonymizeData?: boolean; + privacySettings_allowInference?: boolean; // <-- Add allowInference }; export default function UserProfilePage() { @@ -79,7 +81,7 @@ export default function UserProfilePage() { username: "", phone: "", privacySettings_dataSharingConsent: false, - privacySettings_anonymizeData: false, + privacySettings_allowInference: true, // <-- Default to true }, }); @@ -91,8 +93,8 @@ export default function UserProfilePage() { phone: userProfile.phone || "", privacySettings_dataSharingConsent: userProfile.privacySettings?.dataSharingConsent ?? false, - privacySettings_anonymizeData: - userProfile.privacySettings?.anonymizeData ?? false, + privacySettings_allowInference: + userProfile.privacySettings?.allowInference ?? true, }); } }, [userProfile, reset]); @@ -156,8 +158,8 @@ export default function UserProfilePage() { username: data.username, phone: data.phone, privacySettings: { - dataSharingConsent: data.privacySettings_dataSharingConsent, - anonymizeData: data.privacySettings_anonymizeData, + dataSharingConsent: data.privacySettings_dataSharingConsent ?? false, + allowInference: data.privacySettings_allowInference ?? true, // Default to true if undefined }, }; updateUser(updatePayload); @@ -303,29 +305,57 @@ export default function UserProfilePage() { Privacy Settings

{/* Data Sharing Consent */} - ( - - )} - /> - {/* Anonymize Data */} - ( - - )} - /> +
+ {" "} + {/* Wrap ToggleSwitch and HelperText */} + ( + + )} + /> + {/* Add HelperText or simple div below */} +
+ Controls sharing preferences with specific stores.{" "} + + + +
+
+ + {/* --- Allow Inference Toggle --- */} +
+ {" "} + {/* Wrap ToggleSwitch and HelperText */} + ( + + )} + /> + {/* Add HelperText or simple div below */} +
+ Allow Tapiro to infer demographic insights based on your + activity.{" "} + + + +
+
+ {/* --- End Allow Inference Toggle --- */} + {/* Save Button - Common for all tabs within the form */} - {/* Security Tab */} + {/* Security Tab (Existing) */}
{" "}