From f2f8ea9f9110d4711d8c7f152d95f3a583c91559 Mon Sep 17 00:00:00 2001 From: Perry Zhu Date: Sun, 12 Oct 2025 15:48:54 -0700 Subject: [PATCH 1/5] feat: script to fetch image links --- scripts/.gitignore | 2 + scripts/.python-version | 1 + scripts/README.md | 0 scripts/main.py | 160 ++++++++++++++++++++++++ scripts/pyproject.toml | 11 ++ scripts/test.json | 26 ++++ scripts/uv.lock | 271 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 471 insertions(+) create mode 100644 scripts/.gitignore create mode 100644 scripts/.python-version create mode 100644 scripts/README.md create mode 100644 scripts/main.py create mode 100644 scripts/pyproject.toml create mode 100644 scripts/test.json create mode 100644 scripts/uv.lock diff --git a/scripts/.gitignore b/scripts/.gitignore new file mode 100644 index 0000000..7cd6f5d --- /dev/null +++ b/scripts/.gitignore @@ -0,0 +1,2 @@ +.venv +__pycache__/ \ No newline at end of file diff --git a/scripts/.python-version b/scripts/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/scripts/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..e69de29 diff --git a/scripts/main.py b/scripts/main.py new file mode 100644 index 0000000..9e5f282 --- /dev/null +++ b/scripts/main.py @@ -0,0 +1,160 @@ +from fileinput import filename +import sys +import time +import json +from typing import List +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from selenium.common.exceptions import StaleElementReferenceException +from chromedriver_autoinstaller import install as install_chromedriver + +def find_load_more_button(driver): + """ + Find the Load More button by searching through all buttons and examining their text content. + Returns the button element if found, None otherwise. + """ + try: + # Find all buttons on the page + buttons = driver.find_elements(By.TAG_NAME, "button") + + for button in buttons: + if button and button.is_displayed(): + try: + # Get the text content of the button including all nested elements + button_text = driver.execute_script(""" + function getTextContent(element) { + // Get text from the element itself + let text = element.textContent || element.innerText || ''; + + // Also check all child elements for text + const children = element.querySelectorAll('*'); + for (let child of children) { + if (child.textContent) { + text += ' ' + child.textContent; + } + } + + return text.trim().toLowerCase(); + } + return getTextContent(arguments[0]); + """, button) + + # Check if the button contains "load more" text + if button_text and "load more" in button_text: + return button + + except StaleElementReferenceException: + continue + + return None + + except Exception as e: + print(f"Error finding load more button: {e}") + return None + +def get_image_links_selenium(url): + """ + Crawls a Pexels page using Selenium to fetch the links of all featured images. + Auto-clicks the "Load More" button if exists to load more images. + """ + driver = None # Initialize driver to None + try: + # Automatically install and set up chromedriver + service = Service(install_chromedriver()) + + # Set up Chrome options for headless mode + options = webdriver.ChromeOptions() + options.add_argument('--headless') + options.add_argument('--disable-gpu') + options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36') + + driver = webdriver.Chrome(service=service, options=options) + + print("Fetching URL with Selenium...") + driver.get(url) + # Wait for initial page load + time.sleep(3) + + max_clicks = 5 # Safety limit to prevent infinite loops + click_count = 0 + + # Try clicking the Load More button + while click_count < max_clicks: + load_more_button = find_load_more_button(driver) + + if load_more_button and load_more_button.is_displayed(): + try: + # Click the button + print(f"Found Load More button, clicking... (attempt {click_count + 1})") + click_count += 1 + load_more_button.click() + + # # Wait for new content to load + # print("Waiting for new content to load...") + # time.sleep(3) + + except (StaleElementReferenceException, Exception) as e: + # print(f"Error clicking button: {e}") + continue + else: + print("No Load More button left to click.") + break + + print(f"Finished clicking Load More buttons. Total clicks: {click_count}") + + # Wait a bit more to ensure all content is loaded + time.sleep(2) + + html = driver.page_source + soup = BeautifulSoup(html, 'html.parser') + + imgs = soup.find_all('img') + img_links: List[str] = [] + for img in imgs: + src = img.get('src', '') + if isinstance(src, str) and src.startswith('https://images.pexels.com/photos/'): + img_links.append(src) + + processed_img_links: List[str] = [] + for link in img_links: + processed_link = link.split('?')[0] + if processed_link not in processed_img_links: + processed_img_links.append(processed_link) + + return processed_img_links + + except Exception as e: + print(f"An error occurred: {e}") + return [] + finally: + if driver: + driver.quit() + +def save_links_to_json(links: List[str], filename: str): + """ + Save the list of image links to a JSON file. + """ + try: + with open(filename, 'w') as f: + json.dump({ + "images": links + }, f, indent=4) + print(f"Image links saved to {filename}") + except Exception as e: + print(f"Error saving links to JSON: {e}") + +if __name__ == "__main__": + pexels_url = "https://www.pexels.com/@perry-z-1662054943/featured-uploads/" + links = get_image_links_selenium(pexels_url) + + if links: + print("\nFound the following image links:") + print(json.dumps(links, indent=4)) + print(f"\nTotal image links found: {len(links)}") + if sys.argv and len(sys.argv) > 1: + output_filename = sys.argv[1] + save_links_to_json(links, output_filename) + else: + print("\nNo image links were found.") diff --git a/scripts/pyproject.toml b/scripts/pyproject.toml new file mode 100644 index 0000000..9be6561 --- /dev/null +++ b/scripts/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "scripts" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "bs4>=0.0.2", + "chromedriver-autoinstaller>=0.6.4", + "selenium>=4.36.0", +] diff --git a/scripts/test.json b/scripts/test.json new file mode 100644 index 0000000..c094def --- /dev/null +++ b/scripts/test.json @@ -0,0 +1,26 @@ +{ + "images": [ + "https://images.pexels.com/photos/33698467/pexels-photo-33698467/free-photo-of-vancouver-celebration-of-light-fireworks-at-sunset.jpeg", + "https://images.pexels.com/photos/32825663/pexels-photo-32825663/free-photo-of-burrard-bridge-overlooking-vancouver-marina.jpeg", + "https://images.pexels.com/photos/31503261/pexels-photo-31503261/free-photo-of-cherry-blossom-branches-against-blue-sky.jpeg", + "https://images.pexels.com/photos/31503258/pexels-photo-31503258/free-photo-of-close-up-of-blooming-cherry-blossoms-in-spring.jpeg", + "https://images.pexels.com/photos/30565221/pexels-photo-30565221/free-photo-of-snow-covered-roses-in-winter-scene.jpeg", + "https://images.pexels.com/photos/30565220/pexels-photo-30565220/free-photo-of-snow-covered-bench-in-a-winter-park.jpeg", + "https://images.pexels.com/photos/30422637/pexels-photo-30422637/free-photo-of-cluster-of-green-cacti-in-a-desert-setting.jpeg", + "https://images.pexels.com/photos/30422636/pexels-photo-30422636/free-photo-of-serene-bonsai-tree-in-natural-outdoor-setting.jpeg", + "https://images.pexels.com/photos/28797085/pexels-photo-28797085/free-photo-of-vibrant-autumn-foliage-in-a-dense-forest-setting.jpeg", + "https://images.pexels.com/photos/28797082/pexels-photo-28797082/free-photo-of-serene-forest-waterfall-in-lush-greenery.jpeg", + "https://images.pexels.com/photos/28797079/pexels-photo-28797079/free-photo-of-vibrant-autumn-japanese-maple-leaves-in-nature.jpeg", + "https://images.pexels.com/photos/28797071/pexels-photo-28797071/free-photo-of-modern-building-with-autumn-ivy-facade.jpeg", + "https://images.pexels.com/photos/32825665/pexels-photo-32825665/free-photo-of-modern-vancouver-skyscrapers-against-blue-sky.jpeg", + "https://images.pexels.com/photos/32784315/pexels-photo-32784315/free-photo-of-vibrant-yellow-peruvian-lily-in-lush-greenery.jpeg", + "https://images.pexels.com/photos/30565224/pexels-photo-30565224/free-photo-of-winter-wonderland-with-environmental-message.jpeg", + "https://images.pexels.com/photos/30422639/pexels-photo-30422639/free-photo-of-vibrant-pink-flower-in-sunlit-garden.jpeg", + "https://images.pexels.com/photos/28797087/pexels-photo-28797087/free-photo-of-autumn-maple-leaves-illuminated-by-street-lamp.jpeg", + "https://images.pexels.com/photos/28797083/pexels-photo-28797083/free-photo-of-vibrant-green-leaves-in-tranquil-forest-setting.jpeg", + "https://images.pexels.com/photos/28797080/pexels-photo-28797080/free-photo-of-vibrant-green-and-red-maple-leaves-in-autumn.jpeg", + "https://images.pexels.com/photos/28797076/pexels-photo-28797076/free-photo-of-gray-squirrel-eating-on-green-grass.jpeg", + "https://images.pexels.com/photos/28707412/pexels-photo-28707412/free-photo-of-seagull-eating-pastry-on-green-grass-lawn.jpeg", + "https://images.pexels.com/photos/28687366/pexels-photo-28687366/free-photo-of-fresh-seafood-display-at-local-market.jpeg" + ] +} \ No newline at end of file diff --git a/scripts/uv.lock b/scripts/uv.lock new file mode 100644 index 0000000..6fe2268 --- /dev/null +++ b/scripts/uv.lock @@ -0,0 +1,271 @@ +version = 1 +revision = 3 +requires-python = ">=3.13" + +[[package]] +name = "attrs" +version = "25.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, +] + +[[package]] +name = "bs4" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698, upload-time = "2024-01-17T18:15:47.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189, upload-time = "2024-01-17T18:15:48.613Z" }, +] + +[[package]] +name = "certifi" +version = "2025.10.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "chromedriver-autoinstaller" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/5a/9fc60c65673444d592b8922316c3abcd6177b42208c5a6179f96ccf0e11b/chromedriver-autoinstaller-0.6.4.tar.gz", hash = "sha256:1b4df04b87e6107c730085b98e5fd541db3d1777c32b8bd08e2ca4b1244050af", size = 6944, upload-time = "2024-01-28T15:30:22.385Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/b5/36f0b0add145c371b5282e881a687601899f2d27fae5d0595bc02026b67c/chromedriver_autoinstaller-0.6.4-py3-none-any.whl", hash = "sha256:b12ed187ca9fac4d744deb588d221222ed50836384607e5303e6eab98bb9dc64", size = 7634, upload-time = "2024-01-28T15:30:20.234Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "outcome" +version = "1.3.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060, upload-time = "2023-10-26T04:26:04.361Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692, upload-time = "2023-10-26T04:26:02.532Z" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pycparser" +version = "2.23" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, +] + +[[package]] +name = "pysocks" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429, upload-time = "2019-09-20T02:07:35.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" }, +] + +[[package]] +name = "scripts" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "bs4" }, + { name = "chromedriver-autoinstaller" }, + { name = "selenium" }, +] + +[package.metadata] +requires-dist = [ + { name = "bs4", specifier = ">=0.0.2" }, + { name = "chromedriver-autoinstaller", specifier = ">=0.6.4" }, + { name = "selenium", specifier = ">=4.36.0" }, +] + +[[package]] +name = "selenium" +version = "4.36.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "trio" }, + { name = "trio-websocket" }, + { name = "typing-extensions" }, + { name = "urllib3", extra = ["socks"] }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/35/33d3d84e3399c9d00b489aeccfdc78115e149e45816fb8fe84274329e8a2/selenium-4.36.0.tar.gz", hash = "sha256:0eced83038736c3a013b824116df0b6dbb83e93721545f51b680451013416723", size = 913613, upload-time = "2025-10-02T15:24:37.483Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/9e/642a355e43a4ebf68bc4f00dd4ab264f635079c5dc7ed6d9991a0c2be3d7/selenium-4.36.0-py3-none-any.whl", hash = "sha256:525fdfe96b99c27d9a2c773c75aa7413f4c24bdb7b9749c1950aa3b5f79ed915", size = 9587029, upload-time = "2025-10-02T15:24:35.025Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, +] + +[[package]] +name = "trio" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" }, + { name = "idna" }, + { name = "outcome" }, + { name = "sniffio" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/8f/c6e36dd11201e2a565977d8b13f0b027ba4593c1a80bed5185489178e257/trio-0.31.0.tar.gz", hash = "sha256:f71d551ccaa79d0cb73017a33ef3264fde8335728eb4c6391451fe5d253a9d5b", size = 605825, upload-time = "2025-09-09T15:17:15.242Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/5b/94237a3485620dbff9741df02ff6d8acaa5fdec67d81ab3f62e4d8511bf7/trio-0.31.0-py3-none-any.whl", hash = "sha256:b5d14cd6293d79298b49c3485ffd9c07e3ce03a6da8c7dfbe0cb3dd7dc9a4774", size = 512679, upload-time = "2025-09-09T15:17:13.821Z" }, +] + +[[package]] +name = "trio-websocket" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "outcome" }, + { name = "trio" }, + { name = "wsproto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/3c/8b4358e81f2f2cfe71b66a267f023a91db20a817b9425dd964873796980a/trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae", size = 33549, upload-time = "2025-02-25T05:16:58.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/19/eb640a397bba49ba49ef9dbe2e7e5c04202ba045b6ce2ec36e9cadc51e04/trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6", size = 21221, upload-time = "2025-02-25T05:16:57.545Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, +] + +[package.optional-dependencies] +socks = [ + { name = "pysocks" }, +] + +[[package]] +name = "websocket-client" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/41/aa4bf9664e4cda14c3b39865b12251e8e7d239f4cd0e3cc1b6c2ccde25c1/websocket_client-1.9.0.tar.gz", hash = "sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98", size = 70576, upload-time = "2025-10-07T21:16:36.495Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" }, +] + +[[package]] +name = "wsproto" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425, upload-time = "2022-08-23T19:58:21.447Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226, upload-time = "2022-08-23T19:58:19.96Z" }, +] From 501055ddeb43ccd4886628c65ed12b1c22087c13 Mon Sep 17 00:00:00 2001 From: Perry Zhu Date: Sun, 12 Oct 2025 16:06:16 -0700 Subject: [PATCH 2/5] feat: fetch links on build --- .gitignore | 2 ++ package.json | 20 +++----------------- public/data/rolling-images.json | 22 ---------------------- scripts/prepare-image-links.sh | 8 ++++++++ 4 files changed, 13 insertions(+), 39 deletions(-) delete mode 100644 public/data/rolling-images.json create mode 100755 scripts/prepare-image-links.sh diff --git a/.gitignore b/.gitignore index a547bf3..19ceb5e 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ dist-ssr *.njsproj *.sln *.sw? + +public/data/rolling-images.json \ No newline at end of file diff --git a/package.json b/package.json index 43443a6..49afc28 100644 --- a/package.json +++ b/package.json @@ -4,24 +4,10 @@ "version": "0.0.0", "type": "module", "scripts": { - "dev": "vite", - "build": "tsc -b && vite build", + "dev": "scripts/prepare-image-links.sh && vite", + "build": "scripts/prepare-image-links.sh && tsc -b && vite build", "lint": "eslint .", - "preview": "vite preview --host", - "deploy:dev": "./deploy.sh --dev", - "deploy:prod": "./deploy.sh --prod", - "deploy:dev:down": "./deploy.sh --dev --down", - "deploy:prod:down": "./deploy.sh --prod --down", - "deploy:dev:logs": "./deploy.sh --dev --logs", - "deploy:prod:logs": "./deploy.sh --prod --logs", - "deploy:build-push": "./deploy.sh --build-push", - "scheduler:start": "./manage-scheduler.sh start", - "scheduler:stop": "./manage-scheduler.sh stop", - "scheduler:status": "./manage-scheduler.sh status", - "scheduler:logs": "./manage-scheduler.sh logs", - "scheduler:logs-live": "./manage-scheduler.sh logs-live", - "scheduler:run-now": "./manage-scheduler.sh run-now", - "scheduler:cleanup": "./manage-scheduler.sh cleanup" + "preview": "vite preview --host" }, "dependencies": { "@fullpage/react-fullpage": "^0.1.48", diff --git a/public/data/rolling-images.json b/public/data/rolling-images.json deleted file mode 100644 index e2a90b5..0000000 --- a/public/data/rolling-images.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "images": [ - "https://images.pexels.com/photos/31503261/pexels-photo-31503261/free-photo-of-cherry-blossom-branches-against-blue-sky.jpeg", - "https://images.pexels.com/photos/30565224/pexels-photo-30565224/free-photo-of-winter-wonderland-with-environmental-message.jpeg", - "https://images.pexels.com/photos/30422637/pexels-photo-30422637/free-photo-of-cluster-of-green-cacti-in-a-desert-setting.jpeg", - "https://images.pexels.com/photos/30422636/pexels-photo-30422636/free-photo-of-serene-bonsai-tree-in-natural-outdoor-setting.jpeg", - "https://images.pexels.com/photos/28797087/pexels-photo-28797087/free-photo-of-autumn-maple-leaves-illuminated-by-street-lamp.jpeg", - "https://images.pexels.com/photos/28797083/pexels-photo-28797083/free-photo-of-vibrant-green-leaves-in-tranquil-forest-setting.jpeg", - "https://images.pexels.com/photos/28797080/pexels-photo-28797080/free-photo-of-vibrant-green-and-red-maple-leaves-in-autumn.jpeg", - "https://images.pexels.com/photos/28797076/pexels-photo-28797076/free-photo-of-gray-squirrel-eating-on-green-grass.jpeg", - "https://images.pexels.com/photos/28707412/pexels-photo-28707412/free-photo-of-seagull-eating-pastry-on-green-grass-lawn.jpeg", - "https://images.pexels.com/photos/28687366/pexels-photo-28687366/free-photo-of-fresh-seafood-display-at-local-market.jpeg", - "https://images.pexels.com/photos/31503258/pexels-photo-31503258/free-photo-of-close-up-of-blooming-cherry-blossoms-in-spring.jpeg", - "https://images.pexels.com/photos/30565221/pexels-photo-30565221/free-photo-of-snow-covered-roses-in-winter-scene.jpeg", - "https://images.pexels.com/photos/30565220/pexels-photo-30565220/free-photo-of-snow-covered-bench-in-a-winter-park.jpeg", - "https://images.pexels.com/photos/30422639/pexels-photo-30422639/free-photo-of-vibrant-pink-flower-in-sunlit-garden.jpeg", - "https://images.pexels.com/photos/28797085/pexels-photo-28797085/free-photo-of-vibrant-autumn-foliage-in-a-dense-forest-setting.jpeg", - "https://images.pexels.com/photos/28797082/pexels-photo-28797082/free-photo-of-serene-forest-waterfall-in-lush-greenery.jpeg", - "https://images.pexels.com/photos/28797079/pexels-photo-28797079/free-photo-of-vibrant-autumn-japanese-maple-leaves-in-nature.jpeg", - "https://images.pexels.com/photos/28797071/pexels-photo-28797071/free-photo-of-modern-building-with-autumn-ivy-facade.jpeg" - ] -} \ No newline at end of file diff --git a/scripts/prepare-image-links.sh b/scripts/prepare-image-links.sh new file mode 100755 index 0000000..93307b0 --- /dev/null +++ b/scripts/prepare-image-links.sh @@ -0,0 +1,8 @@ +#!/bin/bash +if [ -f "public/data/rolling-images.json" ]; then + echo "rolling-images.json already exists, skipping..." + exit 0 +fi +cd scripts || exit +uv sync +uv run python main.py "../public/data/rolling-images.json" From 29a38faebe090e8912216f3fcfb3635dcc70d4d4 Mon Sep 17 00:00:00 2001 From: Perry Zhu Date: Sun, 12 Oct 2025 16:10:20 -0700 Subject: [PATCH 3/5] chore: readme and cleanup on scripts --- scripts/README.md | 3 +++ scripts/test.json | 26 -------------------------- 2 files changed, 3 insertions(+), 26 deletions(-) delete mode 100644 scripts/test.json diff --git a/scripts/README.md b/scripts/README.md index e69de29..db724ad 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -0,0 +1,3 @@ +# Pexels Image Fetcher + +A python script to fetch the links of all my featured photos on Pexels. diff --git a/scripts/test.json b/scripts/test.json deleted file mode 100644 index c094def..0000000 --- a/scripts/test.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "images": [ - "https://images.pexels.com/photos/33698467/pexels-photo-33698467/free-photo-of-vancouver-celebration-of-light-fireworks-at-sunset.jpeg", - "https://images.pexels.com/photos/32825663/pexels-photo-32825663/free-photo-of-burrard-bridge-overlooking-vancouver-marina.jpeg", - "https://images.pexels.com/photos/31503261/pexels-photo-31503261/free-photo-of-cherry-blossom-branches-against-blue-sky.jpeg", - "https://images.pexels.com/photos/31503258/pexels-photo-31503258/free-photo-of-close-up-of-blooming-cherry-blossoms-in-spring.jpeg", - "https://images.pexels.com/photos/30565221/pexels-photo-30565221/free-photo-of-snow-covered-roses-in-winter-scene.jpeg", - "https://images.pexels.com/photos/30565220/pexels-photo-30565220/free-photo-of-snow-covered-bench-in-a-winter-park.jpeg", - "https://images.pexels.com/photos/30422637/pexels-photo-30422637/free-photo-of-cluster-of-green-cacti-in-a-desert-setting.jpeg", - "https://images.pexels.com/photos/30422636/pexels-photo-30422636/free-photo-of-serene-bonsai-tree-in-natural-outdoor-setting.jpeg", - "https://images.pexels.com/photos/28797085/pexels-photo-28797085/free-photo-of-vibrant-autumn-foliage-in-a-dense-forest-setting.jpeg", - "https://images.pexels.com/photos/28797082/pexels-photo-28797082/free-photo-of-serene-forest-waterfall-in-lush-greenery.jpeg", - "https://images.pexels.com/photos/28797079/pexels-photo-28797079/free-photo-of-vibrant-autumn-japanese-maple-leaves-in-nature.jpeg", - "https://images.pexels.com/photos/28797071/pexels-photo-28797071/free-photo-of-modern-building-with-autumn-ivy-facade.jpeg", - "https://images.pexels.com/photos/32825665/pexels-photo-32825665/free-photo-of-modern-vancouver-skyscrapers-against-blue-sky.jpeg", - "https://images.pexels.com/photos/32784315/pexels-photo-32784315/free-photo-of-vibrant-yellow-peruvian-lily-in-lush-greenery.jpeg", - "https://images.pexels.com/photos/30565224/pexels-photo-30565224/free-photo-of-winter-wonderland-with-environmental-message.jpeg", - "https://images.pexels.com/photos/30422639/pexels-photo-30422639/free-photo-of-vibrant-pink-flower-in-sunlit-garden.jpeg", - "https://images.pexels.com/photos/28797087/pexels-photo-28797087/free-photo-of-autumn-maple-leaves-illuminated-by-street-lamp.jpeg", - "https://images.pexels.com/photos/28797083/pexels-photo-28797083/free-photo-of-vibrant-green-leaves-in-tranquil-forest-setting.jpeg", - "https://images.pexels.com/photos/28797080/pexels-photo-28797080/free-photo-of-vibrant-green-and-red-maple-leaves-in-autumn.jpeg", - "https://images.pexels.com/photos/28797076/pexels-photo-28797076/free-photo-of-gray-squirrel-eating-on-green-grass.jpeg", - "https://images.pexels.com/photos/28707412/pexels-photo-28707412/free-photo-of-seagull-eating-pastry-on-green-grass-lawn.jpeg", - "https://images.pexels.com/photos/28687366/pexels-photo-28687366/free-photo-of-fresh-seafood-display-at-local-market.jpeg" - ] -} \ No newline at end of file From c37cd66438bcf6bc295fd0b46e7a113248e57a4d Mon Sep 17 00:00:00 2001 From: Perry Zhu Date: Sun, 12 Oct 2025 16:13:37 -0700 Subject: [PATCH 4/5] fix: ci/cd build ignore script --- .github/workflows/build-check.yaml | 2 +- .github/workflows/push-artifacts.yaml | 2 +- package.json | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-check.yaml b/.github/workflows/build-check.yaml index ae5241e..6d70a92 100644 --- a/.github/workflows/build-check.yaml +++ b/.github/workflows/build-check.yaml @@ -18,4 +18,4 @@ jobs: - name: Install Dependencies run: yarn install --frozen-lockfile - name: Build Project - run: yarn run build + run: yarn cibuild diff --git a/.github/workflows/push-artifacts.yaml b/.github/workflows/push-artifacts.yaml index 564fd35..17b1922 100644 --- a/.github/workflows/push-artifacts.yaml +++ b/.github/workflows/push-artifacts.yaml @@ -26,7 +26,7 @@ jobs: - name: Install Dependencies run: yarn install --frozen-lockfile - name: Build Project - run: yarn build + run: yarn cibuild - name: Deploy to S3 run: | aws s3 sync ./dist s3://${{ secrets.AWS_S3_BUCKET }} --delete diff --git a/package.json b/package.json index 49afc28..1a9eac6 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "scripts": { "dev": "scripts/prepare-image-links.sh && vite", "build": "scripts/prepare-image-links.sh && tsc -b && vite build", + "cibuild": "tsc -b && vite build", "lint": "eslint .", "preview": "vite preview --host" }, From fdfe9b6b355e80d22196a993c9ae772a267ee79f Mon Sep 17 00:00:00 2001 From: Perry Zhu Date: Sun, 12 Oct 2025 16:15:34 -0700 Subject: [PATCH 5/5] chore: cleanup comments --- scripts/main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/main.py b/scripts/main.py index 9e5f282..8b00a3a 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -91,10 +91,6 @@ def get_image_links_selenium(url): click_count += 1 load_more_button.click() - # # Wait for new content to load - # print("Waiting for new content to load...") - # time.sleep(3) - except (StaleElementReferenceException, Exception) as e: # print(f"Error clicking button: {e}") continue