diff --git a/controllers/job.py b/controllers/job.py index b0641ce..eeaf15c 100644 --- a/controllers/job.py +++ b/controllers/job.py @@ -6,11 +6,12 @@ import os from function.utils import scrape_job_link from function.crawler.job_portals import scrape_ycombinator_jobpage, scrape_linkedin_jobpage -from function.job_expires.job_expirations import run_job_expiration +from function.job_expires.job_expirations import expire_sudden_jobs from utils.functions import checkExistingJob from middleware import protect_routes from functools import wraps from datetime import datetime, timedelta +import urllib scraperapi_key = os.getenv('SCRAPER_API') @@ -48,6 +49,7 @@ async def create_jobs(): await db.disconnect() @job_blueprint.route('/get', methods=['GET']) +# Get jobs based on source and title async def get_job(): try: if not db.is_connected(): @@ -92,17 +94,21 @@ async def get_job(): @job_blueprint.route('/get/id', methods=['GET']) +# Get job by id async def getJobId(): try: if not db.is_connected(): await db.connect() - jobId = request.args.get('jobId', default=1, type=int) + jobId = request.args.get('jobId', default=1, type=str) # Fetch jobs from the database including the company relation job = await db.job.find_unique( where={"id": jobId}, + include={'company': True} ) + if not job: + return jsonify({'error': 'Job not found'}), 404 # Serialize the job data serialized_job = serialize_job(job) @@ -110,7 +116,7 @@ async def getJobId(): return jsonify({'job': serialized_job }), 200 except Exception as e: - print(e, "here is the error") # Output the error to the console for debugging + print(e, "here is the error") return jsonify({'error': str(e)}), 500 finally: @@ -118,41 +124,78 @@ async def getJobId(): await db.disconnect() @job_blueprint.route('/get/company/list', methods=['GET']) -async def get_companies_list(): +# Get companies list +async def get_companies_list(): try: if not db.is_connected(): await db.connect() - # Fetch jobs from the database including the company relation - companies = await db.company.find_many() + page = request.args.get('page', default=1, type=int) + page_size = request.args.get('page_size', default=10, type=int) + source = request.args.get('source', default=None, type=str) + + if page < 1: + return jsonify({'error': "Page must be a positive number"}), 400 + + skip = (page - 1) * page_size + + filter = {} + if source: + filter['source'] = source + + companies = await db.company.find_many( + where=filter, + skip=skip, + take=page_size + ) + + total_count = await db.company.count( + where={ + 'source': source + } + ) serialized_companies = [company.model_dump() for company in companies] - return jsonify({'companies': serialized_companies}), 200 + + return jsonify({ + 'companies': serialized_companies, + 'page': page, + 'page_size': page_size, + 'total_count': total_count + }), 200 except Exception as e: - print(e, "here is the error") # Output the error to the console for debugging + print(f"Error in get_companies_list: {e}") return jsonify({'error': str(e)}), 500 - + finally: - # Disconnect Prisma client await db.disconnect() @job_blueprint.route('/scrape', methods=['GET']) +# Scrape job from a given link @protect_route() async def scrape_job(): try: + if not db.is_connected(): + await db.connect() portal = request.args.get("portal", default='', type=str) job_link = request.args.get("job_link", default='', type=str) print(job_link, portal, "here is info") + parsed_url = urllib.parse.urlparse(job_link) + pure_link = urllib.parse.urlunparse(parsed_url._replace(query='')) + + existing_job = await db.job.find_first(where={"job_link": pure_link}) + if existing_job: + return jsonify({"message": "Job already exists in database"}), 409 - soup = await scrape_job_link(job_link, portal) + soup = await scrape_job_link(pure_link, portal) jobdata = {} - if portal == 'ycombinator': - print(portal) - jobdata = await scrape_ycombinator_jobpage(soup, job_link) + # if portal == 'ycombinator': + # print(portal) + # jobdata = await scrape_ycombinator_jobpage(soup, job_link) # elif portal == 'glassdoor': # print(portal) @@ -162,9 +205,9 @@ async def scrape_job(): # print(portal) # # jobdata = await scrape_indeed(soup) - elif portal == 'linkedin': - print(portal) - jobdata = await scrape_linkedin_jobpage(soup, job_link) + # elif portal == 'linkedin': + # print(portal) + # jobdata = await scrape_linkedin_jobpage(soup, job_link) # # elif portal == 'internshala': # # await scrape_internshala(soup) @@ -187,18 +230,21 @@ async def scrape_job(): except Exception as e: print(e, "here is the error") # Output the error to the console for debugging return jsonify({'error': str(e)}), 500 + finally: + await db.disconnect() @job_blueprint.route('/expire', methods=['GET']) # @protect_route() +# Check job & expire them async def expire_jobs(): try: if not db.is_connected(): await db.connect() - print("Running scheduled job expiration") - # Run the job expiration process - expired_count = await run_job_expiration() + print("Expiring sudden jobs") + # Add logic to expire jobs which are reported by user suddenly. + expired_count = await expire_sudden_jobs() return jsonify({ 'success': True, @@ -214,44 +260,35 @@ async def expire_jobs(): await db.disconnect() @job_blueprint.route('/stats', methods=['GET']) +# Get job statistics async def get_job_stats(): try: if not db.is_connected(): await db.connect() - - # Get total jobs - total_jobs = await db.job.count() - - # Get active jobs - active_jobs = await db.job.count( - where={ - "status": "active" - } - ) - # Get jobs with end_date in the past current_date = datetime.now() + thirty_days_ago = current_date - timedelta(days=30) + + total_jobs = await db.job.count() + active_jobs = await db.job.count(where={"status": "active"}) expired_end_date = await db.job.count( - where={ - "end_date": {"lte": current_date}, - "status": "active" - } + where={"end_date": {"lte": current_date}, "status": "active"} ) - - # Get jobs older than 30 days - thirty_days_ago = current_date - timedelta(days=30) old_jobs = await db.job.count( - where={ - "posted": {"lte": thirty_days_ago}, - "status": "active" - } + where={"posted": {"lte": thirty_days_ago}, "status": "active"} ) + jobs_by_source = await db.job.group_by(["source"]) + jobs_by_source_dict = {} + for group in jobs_by_source: + count = await db.job.count(where={"source": group["source"]}) + jobs_by_source_dict[group["source"]] = count return jsonify({ 'total_jobs': total_jobs, 'active_jobs': active_jobs, 'jobs_with_expired_end_date': expired_end_date, 'jobs_older_than_30_days': old_jobs, + 'jobs_by_source': jobs_by_source_dict, 'current_time': current_date.isoformat(), 'thirty_days_ago': thirty_days_ago.isoformat() }), 200 diff --git a/function/insert_job.py b/function/insert_job.py index 9b96b32..363fd7b 100644 --- a/function/insert_job.py +++ b/function/insert_job.py @@ -30,11 +30,20 @@ def to_lowercase(value): data={ "company_name": company_name, "company_logo": job.get('company_logo'), - "description": job.get('company_desc') + "description": job.get('company_desc'), + "source": [job.get('source')] if job.get('source') else [] } ) except UniqueViolationError: # Handle race condition where another insert happened company = await db.company.find_unique(where={'company_name': company_name}) + elif job.get('source'): + current_sources = company.source or [] + if job['source'] not in current_sources: + updated_sources = current_sources + [job['source']] + company = await db.company.update( + where={'id': company.id}, + data={'source': updated_sources} + ) # Ensure company exists if not company: diff --git a/function/job_expires/job_expirations.py b/function/job_expires/job_expirations.py index a2ac813..d20e036 100644 --- a/function/job_expires/job_expirations.py +++ b/function/job_expires/job_expirations.py @@ -149,3 +149,24 @@ async def run_job_expiration(): except Exception as e: logger.error(f"Error in run_job_expiration: {str(e)}") return 0 + + +async def expire_sudden_jobs(): + """ + Expire jobs that were reported by user. + """ + try: + if not db.is_connected(): + await db.connect() + + # Add logic to expire jobs which are reported by user suddenly. + # By sending a http request to the jobpage + hello =0 + + return 0 + except Exception as e: + logger.error(f"Error in expire_sudden_jobs: {str(e)}") + return 0 + finally: + if db.is_connected(): + await db.disconnect() diff --git a/prisma/migrations/20250327092937_/migration.sql b/prisma/migrations/20250327092937_/migration.sql new file mode 100644 index 0000000..dfe213f --- /dev/null +++ b/prisma/migrations/20250327092937_/migration.sql @@ -0,0 +1,2 @@ +-- DropEnum +DROP TYPE "JobStatus"; diff --git a/prisma/migrations/20250327093137_added_job_indexes/migration.sql b/prisma/migrations/20250327093137_added_job_indexes/migration.sql new file mode 100644 index 0000000..e936e85 --- /dev/null +++ b/prisma/migrations/20250327093137_added_job_indexes/migration.sql @@ -0,0 +1,17 @@ +-- AlterTable +ALTER TABLE "Company" ADD COLUMN "source" TEXT[]; + +-- CreateIndex +CREATE INDEX "Job_title_idx" ON "Job"("title"); + +-- CreateIndex +CREATE INDEX "Job_job_location_idx" ON "Job"("job_location"); + +-- CreateIndex +CREATE INDEX "Job_salary_min_idx" ON "Job"("salary_min"); + +-- CreateIndex +CREATE INDEX "Job_salary_max_idx" ON "Job"("salary_max"); + +-- CreateIndex +CREATE INDEX "Job_status_idx" ON "Job"("status"); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 07c249c..93fff3b 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -63,6 +63,7 @@ model Company { company_logo String? socials Json? // JSON field to store all social media links last_job_ids Json? // Added this for storing job Ids + source String[] jobs Job[] @@unique([company_name], map: "UniqueCompanyNameConstraint") @@ -95,4 +96,9 @@ model Job { trackedJobs Tracked_Jobs[] @@unique([title, job_id, companyId], map: "UniqueJobTitleCompany") + @@index([title]) // Index for title searches + @@index([job_location]) // Index for location filters + @@index([salary_min]) // Index for salary range filters + @@index([salary_max]) // Index for salary range filters + @@index([status]) // Index for status filters } diff --git a/utils/serialize_data.py b/utils/serialize_data.py index 63d149e..a0ab737 100644 --- a/utils/serialize_data.py +++ b/utils/serialize_data.py @@ -2,14 +2,26 @@ def serialize_job(job): return { 'id': job.id, 'title': job.title, + 'job_link': job.job_link, + 'job_type': job.job_type, + 'job_id': job.job_id, + 'job_location': job.job_location, + 'salary_min': job.salary_min, + 'salary_max': job.salary_max, + 'job_salary': job.job_salary, + 'experience_min': job.experience_min, + 'experience_max': job.experience_max, + 'experience': job.experience, + 'job_description': job.job_description, + 'skills_required': job.skills_required, + 'source': job.source, + 'source_logo': job.source_logo, + 'posted': job.posted, + 'end_date': job.end_date, 'company': { + 'id': job.company.id, 'name': job.company.company_name, 'logo': job.company.company_logo, - 'description': job.company.description }, - 'job_location': job.job_location, - 'job_type': job.job_type, - 'job_salary': job.job_salary, - 'job_link': job.job_link, - 'source': job.source + 'status': job.status, }