Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ jobs:
- name: Install Dependencies
run: yarn install --frozen-lockfile
- name: Build Project
run: yarn run build
run: yarn cibuild
2 changes: 1 addition & 1 deletion .github/workflows/push-artifacts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Install Dependencies
run: yarn install --frozen-lockfile
- name: Build Project
run: yarn build
run: yarn cibuild
- name: Deploy to S3
run: |
aws s3 sync ./dist s3://${{ secrets.AWS_S3_BUCKET }} --delete
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ dist-ssr
*.njsproj
*.sln
*.sw?

public/data/rolling-images.json
21 changes: 4 additions & 17 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,11 @@
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"dev": "scripts/prepare-image-links.sh && vite",
"build": "scripts/prepare-image-links.sh && tsc -b && vite build",
"cibuild": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview --host",
"deploy:dev": "./deploy.sh --dev",
"deploy:prod": "./deploy.sh --prod",
"deploy:dev:down": "./deploy.sh --dev --down",
"deploy:prod:down": "./deploy.sh --prod --down",
"deploy:dev:logs": "./deploy.sh --dev --logs",
"deploy:prod:logs": "./deploy.sh --prod --logs",
"deploy:build-push": "./deploy.sh --build-push",
"scheduler:start": "./manage-scheduler.sh start",
"scheduler:stop": "./manage-scheduler.sh stop",
"scheduler:status": "./manage-scheduler.sh status",
"scheduler:logs": "./manage-scheduler.sh logs",
"scheduler:logs-live": "./manage-scheduler.sh logs-live",
"scheduler:run-now": "./manage-scheduler.sh run-now",
"scheduler:cleanup": "./manage-scheduler.sh cleanup"
"preview": "vite preview --host"
},
"dependencies": {
"@fullpage/react-fullpage": "^0.1.48",
Expand Down
22 changes: 0 additions & 22 deletions public/data/rolling-images.json

This file was deleted.

2 changes: 2 additions & 0 deletions scripts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.venv
__pycache__/
1 change: 1 addition & 0 deletions scripts/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
3 changes: 3 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Pexels Image Fetcher

A python script to fetch the links of all my featured photos on Pexels.
156 changes: 156 additions & 0 deletions scripts/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from fileinput import filename
Copy link

Copilot AI Oct 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fileinput module import is unused and should be removed. The filename identifier from this import is not used anywhere in the code.

Suggested change
from fileinput import filename

Copilot uses AI. Check for mistakes.
import sys
import time
import json
from typing import List
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from chromedriver_autoinstaller import install as install_chromedriver

def find_load_more_button(driver):
"""
Find the Load More button by searching through all buttons and examining their text content.
Returns the button element if found, None otherwise.
"""
try:
# Find all buttons on the page
buttons = driver.find_elements(By.TAG_NAME, "button")

for button in buttons:
if button and button.is_displayed():
try:
# Get the text content of the button including all nested elements
button_text = driver.execute_script("""
function getTextContent(element) {
// Get text from the element itself
let text = element.textContent || element.innerText || '';

// Also check all child elements for text
const children = element.querySelectorAll('*');
for (let child of children) {
if (child.textContent) {
text += ' ' + child.textContent;
}
}

return text.trim().toLowerCase();
}
return getTextContent(arguments[0]);
""", button)

# Check if the button contains "load more" text
if button_text and "load more" in button_text:
return button

except StaleElementReferenceException:
continue

return None

except Exception as e:
print(f"Error finding load more button: {e}")
return None

def get_image_links_selenium(url):
"""
Crawls a Pexels page using Selenium to fetch the links of all featured images.
Auto-clicks the "Load More" button if exists to load more images.
"""
driver = None # Initialize driver to None
try:
# Automatically install and set up chromedriver
service = Service(install_chromedriver())

# Set up Chrome options for headless mode
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36')

driver = webdriver.Chrome(service=service, options=options)

print("Fetching URL with Selenium...")
driver.get(url)
# Wait for initial page load
time.sleep(3)

max_clicks = 5 # Safety limit to prevent infinite loops
click_count = 0

# Try clicking the Load More button
while click_count < max_clicks:
load_more_button = find_load_more_button(driver)

if load_more_button and load_more_button.is_displayed():
try:
# Click the button
print(f"Found Load More button, clicking... (attempt {click_count + 1})")
click_count += 1
load_more_button.click()

except (StaleElementReferenceException, Exception) as e:
# print(f"Error clicking button: {e}")
Copy link

Copilot AI Oct 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented-out debug print statement or replace with proper logging if error information is needed.

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Oct 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commented-out error logging should either be enabled for debugging purposes or removed entirely. Silent error handling can make troubleshooting difficult.

Suggested change
# print(f"Error clicking button: {e}")
print(f"Error clicking button: {e}")

Copilot uses AI. Check for mistakes.
continue
else:
print("No Load More button left to click.")
break

print(f"Finished clicking Load More buttons. Total clicks: {click_count}")

# Wait a bit more to ensure all content is loaded
time.sleep(2)

html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

imgs = soup.find_all('img')
img_links: List[str] = []
for img in imgs:
src = img.get('src', '')
if isinstance(src, str) and src.startswith('https://images.pexels.com/photos/'):
img_links.append(src)

processed_img_links: List[str] = []
for link in img_links:
processed_link = link.split('?')[0]
if processed_link not in processed_img_links:
processed_img_links.append(processed_link)

return processed_img_links

except Exception as e:
print(f"An error occurred: {e}")
return []
finally:
if driver:
driver.quit()

def save_links_to_json(links: List[str], filename: str):
"""
Save the list of image links to a JSON file.
"""
try:
with open(filename, 'w') as f:
json.dump({
"images": links
}, f, indent=4)
print(f"Image links saved to {filename}")
except Exception as e:
print(f"Error saving links to JSON: {e}")

if __name__ == "__main__":
pexels_url = "https://www.pexels.com/@perry-z-1662054943/featured-uploads/"
links = get_image_links_selenium(pexels_url)

if links:
print("\nFound the following image links:")
print(json.dumps(links, indent=4))
print(f"\nTotal image links found: {len(links)}")
if sys.argv and len(sys.argv) > 1:
output_filename = sys.argv[1]
save_links_to_json(links, output_filename)
else:
print("\nNo image links were found.")
8 changes: 8 additions & 0 deletions scripts/prepare-image-links.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
if [ -f "public/data/rolling-images.json" ]; then
echo "rolling-images.json already exists, skipping..."
exit 0
fi
cd scripts || exit
uv sync
uv run python main.py "../public/data/rolling-images.json"
11 changes: 11 additions & 0 deletions scripts/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[project]
name = "scripts"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"bs4>=0.0.2",
"chromedriver-autoinstaller>=0.6.4",
"selenium>=4.36.0",
]
Loading