Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 42 additions & 79 deletions controllers/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
import os
from function.utils import scrape_job_link
from function.crawler.job_portals import scrape_ycombinator_jobpage, scrape_linkedin_jobpage
from function.job_expires.job_expirations import expire_sudden_jobs
from function.job_expires.job_expirations import run_job_expiration
from utils.functions import checkExistingJob
from middleware import protect_routes
from functools import wraps
from datetime import datetime, timedelta
import urllib

scraperapi_key = os.getenv('SCRAPER_API')

Expand Down Expand Up @@ -49,7 +48,6 @@ async def create_jobs():
await db.disconnect()

@job_blueprint.route('/get', methods=['GET'])
# Get jobs based on source and title
async def get_job():
try:
if not db.is_connected():
Expand Down Expand Up @@ -94,108 +92,67 @@ async def get_job():


@job_blueprint.route('/get/id', methods=['GET'])
# Get job by id
async def getJobId():
try:
if not db.is_connected():
await db.connect()

jobId = request.args.get('jobId', default=1, type=str)
jobId = request.args.get('jobId', default=1, type=int)

# Fetch jobs from the database including the company relation
job = await db.job.find_unique(
where={"id": jobId},
include={'company': True}
)
if not job:
return jsonify({'error': 'Job not found'}), 404

# Serialize the job data
serialized_job = serialize_job(job)

return jsonify({'job': serialized_job }), 200

except Exception as e:
print(e, "here is the error")
print(e, "here is the error") # Output the error to the console for debugging
return jsonify({'error': str(e)}), 500

finally:
# Disconnect Prisma client
await db.disconnect()

@job_blueprint.route('/get/company/list', methods=['GET'])
# Get companies list
async def get_companies_list():
async def get_companies_list():
try:
if not db.is_connected():
await db.connect()

page = request.args.get('page', default=1, type=int)
page_size = request.args.get('page_size', default=10, type=int)
source = request.args.get('source', default=None, type=str)

if page < 1:
return jsonify({'error': "Page must be a positive number"}), 400

skip = (page - 1) * page_size

filter = {}
if source:
filter['source'] = source

companies = await db.company.find_many(
where=filter,
skip=skip,
take=page_size
)

total_count = await db.company.count(
where={
'source': source
}
)
# Fetch jobs from the database including the company relation
companies = await db.company.find_many()

serialized_companies = [company.model_dump() for company in companies]

return jsonify({
'companies': serialized_companies,
'page': page,
'page_size': page_size,
'total_count': total_count
}), 200
return jsonify({'companies': serialized_companies}), 200

except Exception as e:
print(f"Error in get_companies_list: {e}")
print(e, "here is the error") # Output the error to the console for debugging
return jsonify({'error': str(e)}), 500

finally:
# Disconnect Prisma client
await db.disconnect()


@job_blueprint.route('/scrape', methods=['GET'])
# Scrape job from a given link
@protect_route()
async def scrape_job():
try:
if not db.is_connected():
await db.connect()
portal = request.args.get("portal", default='', type=str)
job_link = request.args.get("job_link", default='', type=str)

print(job_link, portal, "here is info")
parsed_url = urllib.parse.urlparse(job_link)
pure_link = urllib.parse.urlunparse(parsed_url._replace(query=''))

existing_job = await db.job.find_first(where={"job_link": pure_link})
if existing_job:
return jsonify({"message": "Job already exists in database"}), 409

soup = await scrape_job_link(pure_link, portal)
soup = await scrape_job_link(job_link, portal)
jobdata = {}

# if portal == 'ycombinator':
# print(portal)
# jobdata = await scrape_ycombinator_jobpage(soup, job_link)
if portal == 'ycombinator':
print(portal)
jobdata = await scrape_ycombinator_jobpage(soup, job_link)

# elif portal == 'glassdoor':
# print(portal)
Expand All @@ -205,9 +162,9 @@ async def scrape_job():
# print(portal)
# # jobdata = await scrape_indeed(soup)

# elif portal == 'linkedin':
# print(portal)
# jobdata = await scrape_linkedin_jobpage(soup, job_link)
elif portal == 'linkedin':
print(portal)
jobdata = await scrape_linkedin_jobpage(soup, job_link)

# # elif portal == 'internshala':
# # await scrape_internshala(soup)
Expand All @@ -230,21 +187,18 @@ async def scrape_job():
except Exception as e:
print(e, "here is the error") # Output the error to the console for debugging
return jsonify({'error': str(e)}), 500
finally:
await db.disconnect()

@job_blueprint.route('/expire', methods=['GET'])
# @protect_route()
# Check job & expire them
async def expire_jobs():
try:

if not db.is_connected():
await db.connect()

print("Expiring sudden jobs")
# Add logic to expire jobs which are reported by user suddenly.
expired_count = await expire_sudden_jobs()
print("Running scheduled job expiration")
# Run the job expiration process
expired_count = await run_job_expiration()

return jsonify({
'success': True,
Expand All @@ -260,35 +214,44 @@ async def expire_jobs():
await db.disconnect()

@job_blueprint.route('/stats', methods=['GET'])
# Get job statistics
async def get_job_stats():
try:
if not db.is_connected():
await db.connect()

# Get total jobs
total_jobs = await db.job.count()

current_date = datetime.now()
thirty_days_ago = current_date - timedelta(days=30)
# Get active jobs
active_jobs = await db.job.count(
where={
"status": "active"
}
)

total_jobs = await db.job.count()
active_jobs = await db.job.count(where={"status": "active"})
# Get jobs with end_date in the past
current_date = datetime.now()
expired_end_date = await db.job.count(
where={"end_date": {"lte": current_date}, "status": "active"}
where={
"end_date": {"lte": current_date},
"status": "active"
}
)

# Get jobs older than 30 days
thirty_days_ago = current_date - timedelta(days=30)
old_jobs = await db.job.count(
where={"posted": {"lte": thirty_days_ago}, "status": "active"}
where={
"posted": {"lte": thirty_days_ago},
"status": "active"
}
)

jobs_by_source = await db.job.group_by(["source"])
jobs_by_source_dict = {}
for group in jobs_by_source:
count = await db.job.count(where={"source": group["source"]})
jobs_by_source_dict[group["source"]] = count
return jsonify({
'total_jobs': total_jobs,
'active_jobs': active_jobs,
'jobs_with_expired_end_date': expired_end_date,
'jobs_older_than_30_days': old_jobs,
'jobs_by_source': jobs_by_source_dict,
'current_time': current_date.isoformat(),
'thirty_days_ago': thirty_days_ago.isoformat()
}), 200
Expand Down
11 changes: 1 addition & 10 deletions function/insert_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,11 @@ def to_lowercase(value):
data={
"company_name": company_name,
"company_logo": job.get('company_logo'),
"description": job.get('company_desc'),
"source": [job.get('source')] if job.get('source') else []
"description": job.get('company_desc')
}
)
except UniqueViolationError: # Handle race condition where another insert happened
company = await db.company.find_unique(where={'company_name': company_name})
elif job.get('source'):
current_sources = company.source or []
if job['source'] not in current_sources:
updated_sources = current_sources + [job['source']]
company = await db.company.update(
where={'id': company.id},
data={'source': updated_sources}
)

# Ensure company exists
if not company:
Expand Down
21 changes: 0 additions & 21 deletions function/job_expires/job_expirations.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,24 +149,3 @@ async def run_job_expiration():
except Exception as e:
logger.error(f"Error in run_job_expiration: {str(e)}")
return 0


async def expire_sudden_jobs():
"""
Expire jobs that were reported by user.
"""
try:
if not db.is_connected():
await db.connect()

# Add logic to expire jobs which are reported by user suddenly.
# By sending a http request to the jobpage
hello =0

return 0
except Exception as e:
logger.error(f"Error in expire_sudden_jobs: {str(e)}")
return 0
finally:
if db.is_connected():
await db.disconnect()
2 changes: 0 additions & 2 deletions prisma/migrations/20250327092937_/migration.sql

This file was deleted.

17 changes: 0 additions & 17 deletions prisma/migrations/20250327093137_added_job_indexes/migration.sql

This file was deleted.

6 changes: 0 additions & 6 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ model Company {
company_logo String?
socials Json? // JSON field to store all social media links
last_job_ids Json? // Added this for storing job Ids
source String[]
jobs Job[]

@@unique([company_name], map: "UniqueCompanyNameConstraint")
Expand Down Expand Up @@ -97,9 +96,4 @@ model Job {
trackedJobs Tracked_Jobs[]

@@unique([title, job_id, companyId], map: "UniqueJobTitleCompany")
@@index([title]) // Index for title searches
@@index([job_location]) // Index for location filters
@@index([salary_min]) // Index for salary range filters
@@index([salary_max]) // Index for salary range filters
@@index([status]) // Index for status filters
}
24 changes: 6 additions & 18 deletions utils/serialize_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,14 @@ def serialize_job(job):
return {
'id': job.id,
'title': job.title,
'job_link': job.job_link,
'job_type': job.job_type,
'job_id': job.job_id,
'job_location': job.job_location,
'salary_min': job.salary_min,
'salary_max': job.salary_max,
'job_salary': job.job_salary,
'experience_min': job.experience_min,
'experience_max': job.experience_max,
'experience': job.experience,
'job_description': job.job_description,
'skills_required': job.skills_required,
'source': job.source,
'source_logo': job.source_logo,
'posted': job.posted,
'end_date': job.end_date,
'company': {
'id': job.company.id,
'name': job.company.company_name,
'logo': job.company.company_logo,
'description': job.company.description
},
'status': job.status,
'job_location': job.job_location,
'job_type': job.job_type,
'job_salary': job.job_salary,
'job_link': job.job_link,
'source': job.source
}
Loading