From cb67011824a71235ad1ea2ce059d826ccca727f9 Mon Sep 17 00:00:00 2001 From: Ramesh chandra pola <114871196+rameshchandra8520@users.noreply.github.com> Date: Sat, 29 Mar 2025 11:11:08 +0530 Subject: [PATCH] Revert "Fixed get job apis" --- controllers/job.py | 121 ++++++------------ function/insert_job.py | 11 +- function/job_expires/job_expirations.py | 21 --- .../migrations/20250327092937_/migration.sql | 2 - .../migration.sql | 17 --- prisma/schema.prisma | 6 - utils/serialize_data.py | 24 +--- 7 files changed, 49 insertions(+), 153 deletions(-) delete mode 100644 prisma/migrations/20250327092937_/migration.sql delete mode 100644 prisma/migrations/20250327093137_added_job_indexes/migration.sql diff --git a/controllers/job.py b/controllers/job.py index eeaf15c..b0641ce 100644 --- a/controllers/job.py +++ b/controllers/job.py @@ -6,12 +6,11 @@ import os from function.utils import scrape_job_link from function.crawler.job_portals import scrape_ycombinator_jobpage, scrape_linkedin_jobpage -from function.job_expires.job_expirations import expire_sudden_jobs +from function.job_expires.job_expirations import run_job_expiration from utils.functions import checkExistingJob from middleware import protect_routes from functools import wraps from datetime import datetime, timedelta -import urllib scraperapi_key = os.getenv('SCRAPER_API') @@ -49,7 +48,6 @@ async def create_jobs(): await db.disconnect() @job_blueprint.route('/get', methods=['GET']) -# Get jobs based on source and title async def get_job(): try: if not db.is_connected(): @@ -94,21 +92,17 @@ async def get_job(): @job_blueprint.route('/get/id', methods=['GET']) -# Get job by id async def getJobId(): try: if not db.is_connected(): await db.connect() - jobId = request.args.get('jobId', default=1, type=str) + jobId = request.args.get('jobId', default=1, type=int) # Fetch jobs from the database including the company relation job = await db.job.find_unique( where={"id": jobId}, - include={'company': True} ) - if not job: - return jsonify({'error': 'Job not found'}), 404 # Serialize the job data serialized_job = serialize_job(job) @@ -116,7 +110,7 @@ async def getJobId(): return jsonify({'job': serialized_job }), 200 except Exception as e: - print(e, "here is the error") + print(e, "here is the error") # Output the error to the console for debugging return jsonify({'error': str(e)}), 500 finally: @@ -124,78 +118,41 @@ async def getJobId(): await db.disconnect() @job_blueprint.route('/get/company/list', methods=['GET']) -# Get companies list -async def get_companies_list(): +async def get_companies_list(): try: if not db.is_connected(): await db.connect() - page = request.args.get('page', default=1, type=int) - page_size = request.args.get('page_size', default=10, type=int) - source = request.args.get('source', default=None, type=str) - - if page < 1: - return jsonify({'error': "Page must be a positive number"}), 400 - - skip = (page - 1) * page_size - - filter = {} - if source: - filter['source'] = source - - companies = await db.company.find_many( - where=filter, - skip=skip, - take=page_size - ) - - total_count = await db.company.count( - where={ - 'source': source - } - ) + # Fetch jobs from the database including the company relation + companies = await db.company.find_many() serialized_companies = [company.model_dump() for company in companies] - - return jsonify({ - 'companies': serialized_companies, - 'page': page, - 'page_size': page_size, - 'total_count': total_count - }), 200 + return jsonify({'companies': serialized_companies}), 200 except Exception as e: - print(f"Error in get_companies_list: {e}") + print(e, "here is the error") # Output the error to the console for debugging return jsonify({'error': str(e)}), 500 - + finally: + # Disconnect Prisma client await db.disconnect() @job_blueprint.route('/scrape', methods=['GET']) -# Scrape job from a given link @protect_route() async def scrape_job(): try: - if not db.is_connected(): - await db.connect() portal = request.args.get("portal", default='', type=str) job_link = request.args.get("job_link", default='', type=str) print(job_link, portal, "here is info") - parsed_url = urllib.parse.urlparse(job_link) - pure_link = urllib.parse.urlunparse(parsed_url._replace(query='')) - - existing_job = await db.job.find_first(where={"job_link": pure_link}) - if existing_job: - return jsonify({"message": "Job already exists in database"}), 409 - soup = await scrape_job_link(pure_link, portal) + soup = await scrape_job_link(job_link, portal) jobdata = {} - # if portal == 'ycombinator': - # print(portal) - # jobdata = await scrape_ycombinator_jobpage(soup, job_link) + if portal == 'ycombinator': + print(portal) + jobdata = await scrape_ycombinator_jobpage(soup, job_link) # elif portal == 'glassdoor': # print(portal) @@ -205,9 +162,9 @@ async def scrape_job(): # print(portal) # # jobdata = await scrape_indeed(soup) - # elif portal == 'linkedin': - # print(portal) - # jobdata = await scrape_linkedin_jobpage(soup, job_link) + elif portal == 'linkedin': + print(portal) + jobdata = await scrape_linkedin_jobpage(soup, job_link) # # elif portal == 'internshala': # # await scrape_internshala(soup) @@ -230,21 +187,18 @@ async def scrape_job(): except Exception as e: print(e, "here is the error") # Output the error to the console for debugging return jsonify({'error': str(e)}), 500 - finally: - await db.disconnect() @job_blueprint.route('/expire', methods=['GET']) # @protect_route() -# Check job & expire them async def expire_jobs(): try: if not db.is_connected(): await db.connect() - print("Expiring sudden jobs") - # Add logic to expire jobs which are reported by user suddenly. - expired_count = await expire_sudden_jobs() + print("Running scheduled job expiration") + # Run the job expiration process + expired_count = await run_job_expiration() return jsonify({ 'success': True, @@ -260,35 +214,44 @@ async def expire_jobs(): await db.disconnect() @job_blueprint.route('/stats', methods=['GET']) -# Get job statistics async def get_job_stats(): try: if not db.is_connected(): await db.connect() + + # Get total jobs + total_jobs = await db.job.count() - current_date = datetime.now() - thirty_days_ago = current_date - timedelta(days=30) + # Get active jobs + active_jobs = await db.job.count( + where={ + "status": "active" + } + ) - total_jobs = await db.job.count() - active_jobs = await db.job.count(where={"status": "active"}) + # Get jobs with end_date in the past + current_date = datetime.now() expired_end_date = await db.job.count( - where={"end_date": {"lte": current_date}, "status": "active"} + where={ + "end_date": {"lte": current_date}, + "status": "active" + } ) + + # Get jobs older than 30 days + thirty_days_ago = current_date - timedelta(days=30) old_jobs = await db.job.count( - where={"posted": {"lte": thirty_days_ago}, "status": "active"} + where={ + "posted": {"lte": thirty_days_ago}, + "status": "active" + } ) - jobs_by_source = await db.job.group_by(["source"]) - jobs_by_source_dict = {} - for group in jobs_by_source: - count = await db.job.count(where={"source": group["source"]}) - jobs_by_source_dict[group["source"]] = count return jsonify({ 'total_jobs': total_jobs, 'active_jobs': active_jobs, 'jobs_with_expired_end_date': expired_end_date, 'jobs_older_than_30_days': old_jobs, - 'jobs_by_source': jobs_by_source_dict, 'current_time': current_date.isoformat(), 'thirty_days_ago': thirty_days_ago.isoformat() }), 200 diff --git a/function/insert_job.py b/function/insert_job.py index 363fd7b..9b96b32 100644 --- a/function/insert_job.py +++ b/function/insert_job.py @@ -30,20 +30,11 @@ def to_lowercase(value): data={ "company_name": company_name, "company_logo": job.get('company_logo'), - "description": job.get('company_desc'), - "source": [job.get('source')] if job.get('source') else [] + "description": job.get('company_desc') } ) except UniqueViolationError: # Handle race condition where another insert happened company = await db.company.find_unique(where={'company_name': company_name}) - elif job.get('source'): - current_sources = company.source or [] - if job['source'] not in current_sources: - updated_sources = current_sources + [job['source']] - company = await db.company.update( - where={'id': company.id}, - data={'source': updated_sources} - ) # Ensure company exists if not company: diff --git a/function/job_expires/job_expirations.py b/function/job_expires/job_expirations.py index d20e036..a2ac813 100644 --- a/function/job_expires/job_expirations.py +++ b/function/job_expires/job_expirations.py @@ -149,24 +149,3 @@ async def run_job_expiration(): except Exception as e: logger.error(f"Error in run_job_expiration: {str(e)}") return 0 - - -async def expire_sudden_jobs(): - """ - Expire jobs that were reported by user. - """ - try: - if not db.is_connected(): - await db.connect() - - # Add logic to expire jobs which are reported by user suddenly. - # By sending a http request to the jobpage - hello =0 - - return 0 - except Exception as e: - logger.error(f"Error in expire_sudden_jobs: {str(e)}") - return 0 - finally: - if db.is_connected(): - await db.disconnect() diff --git a/prisma/migrations/20250327092937_/migration.sql b/prisma/migrations/20250327092937_/migration.sql deleted file mode 100644 index dfe213f..0000000 --- a/prisma/migrations/20250327092937_/migration.sql +++ /dev/null @@ -1,2 +0,0 @@ --- DropEnum -DROP TYPE "JobStatus"; diff --git a/prisma/migrations/20250327093137_added_job_indexes/migration.sql b/prisma/migrations/20250327093137_added_job_indexes/migration.sql deleted file mode 100644 index e936e85..0000000 --- a/prisma/migrations/20250327093137_added_job_indexes/migration.sql +++ /dev/null @@ -1,17 +0,0 @@ --- AlterTable -ALTER TABLE "Company" ADD COLUMN "source" TEXT[]; - --- CreateIndex -CREATE INDEX "Job_title_idx" ON "Job"("title"); - --- CreateIndex -CREATE INDEX "Job_job_location_idx" ON "Job"("job_location"); - --- CreateIndex -CREATE INDEX "Job_salary_min_idx" ON "Job"("salary_min"); - --- CreateIndex -CREATE INDEX "Job_salary_max_idx" ON "Job"("salary_max"); - --- CreateIndex -CREATE INDEX "Job_status_idx" ON "Job"("status"); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index ebb5314..d126c41 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -64,7 +64,6 @@ model Company { company_logo String? socials Json? // JSON field to store all social media links last_job_ids Json? // Added this for storing job Ids - source String[] jobs Job[] @@unique([company_name], map: "UniqueCompanyNameConstraint") @@ -97,9 +96,4 @@ model Job { trackedJobs Tracked_Jobs[] @@unique([title, job_id, companyId], map: "UniqueJobTitleCompany") - @@index([title]) // Index for title searches - @@index([job_location]) // Index for location filters - @@index([salary_min]) // Index for salary range filters - @@index([salary_max]) // Index for salary range filters - @@index([status]) // Index for status filters } diff --git a/utils/serialize_data.py b/utils/serialize_data.py index a0ab737..63d149e 100644 --- a/utils/serialize_data.py +++ b/utils/serialize_data.py @@ -2,26 +2,14 @@ def serialize_job(job): return { 'id': job.id, 'title': job.title, - 'job_link': job.job_link, - 'job_type': job.job_type, - 'job_id': job.job_id, - 'job_location': job.job_location, - 'salary_min': job.salary_min, - 'salary_max': job.salary_max, - 'job_salary': job.job_salary, - 'experience_min': job.experience_min, - 'experience_max': job.experience_max, - 'experience': job.experience, - 'job_description': job.job_description, - 'skills_required': job.skills_required, - 'source': job.source, - 'source_logo': job.source_logo, - 'posted': job.posted, - 'end_date': job.end_date, 'company': { - 'id': job.company.id, 'name': job.company.company_name, 'logo': job.company.company_logo, + 'description': job.company.description }, - 'status': job.status, + 'job_location': job.job_location, + 'job_type': job.job_type, + 'job_salary': job.job_salary, + 'job_link': job.job_link, + 'source': job.source }