diff --git a/.gitignore b/.gitignore index 4af9dfe..3742713 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ feed*.json *.pyc *.pyo *.pyd + +#Ignore saved data +data/* \ No newline at end of file diff --git a/bbc_world.xml b/bbc_world.xml deleted file mode 100644 index 35a7e7e..0000000 --- a/bbc_world.xml +++ /dev/null @@ -1,242 +0,0 @@ - - - <![CDATA[BBC News]]> - - https://www.bbc.co.uk/news/world - - https://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif - BBC News - https://www.bbc.co.uk/news/world - - RSS for Node - Mon, 23 Jun 2025 16:59:28 GMT - - - - 15 - - <![CDATA[Israel says it struck Tehran's Evin prison and Fordo access routes]]> - - https://www.bbc.com/news/articles/cp8621gnknjo - https://www.bbc.com/news/articles/cp8621gnknjo#0 - Mon, 23 Jun 2025 16:29:51 GMT - - - - <![CDATA['Why wouldn't there be a regime change?' Trump's latest on Iran]]> - - https://www.bbc.com/news/articles/cp8m3861637o - https://www.bbc.com/news/articles/cp8m3861637o#0 - Mon, 23 Jun 2025 13:01:31 GMT - - - - <![CDATA[Bride shot dead in attack on French wedding party]]> - - https://www.bbc.com/news/articles/cn0q8pypxwxo - https://www.bbc.com/news/articles/cn0q8pypxwxo#0 - Mon, 23 Jun 2025 00:50:23 GMT - - - - <![CDATA[Nine killed in Kyiv in intense Russian air attack]]> - - https://www.bbc.com/news/articles/c0k7vll16l3o - https://www.bbc.com/news/articles/c0k7vll16l3o#0 - Mon, 23 Jun 2025 13:12:01 GMT - - - - <![CDATA[Scottish wingsuit flyer Liam Byrne dies during Swiss Alps jump]]> - - https://www.bbc.com/news/articles/cwyepy2zz11o - https://www.bbc.com/news/articles/cwyepy2zz11o#0 - Mon, 23 Jun 2025 13:36:18 GMT - - - - <![CDATA[Rescuers search for Brazilian tourist on Indonesian volcano]]> - - https://www.bbc.com/news/articles/crk60k8gp8jo - https://www.bbc.com/news/articles/crk60k8gp8jo#0 - Mon, 23 Jun 2025 11:10:36 GMT - - - - <![CDATA[First celestial image unveiled from revolutionary telescope]]> - - https://www.bbc.com/news/articles/cj3rmjjgx6xo - https://www.bbc.com/news/articles/cj3rmjjgx6xo#1 - Mon, 23 Jun 2025 16:35:11 GMT - - - - <![CDATA[Nato summit could be the most significant since end of Cold War]]> - - https://www.bbc.com/news/articles/cqjqvr75v1jo - https://www.bbc.com/news/articles/cqjqvr75v1jo#1 - Mon, 23 Jun 2025 14:02:57 GMT - - - - <![CDATA[Police officers charged with murder of Kenyan blogger]]> - - https://www.bbc.com/news/articles/cz09l4k4184o - https://www.bbc.com/news/articles/cz09l4k4184o#1 - Mon, 23 Jun 2025 15:46:31 GMT - - - - <![CDATA[Dozens of soldiers seized by locals in Colombian mountains]]> - - https://www.bbc.com/news/articles/c4gdlrk1pzko - https://www.bbc.com/news/articles/c4gdlrk1pzko#1 - Mon, 23 Jun 2025 04:09:05 GMT - - - - <![CDATA[Belarus opposition leader's husband urges Trump to help other prisoners ]]> - - https://www.bbc.com/news/articles/c07dgmymd1eo - https://www.bbc.com/news/articles/c07dgmymd1eo#1 - Sun, 22 Jun 2025 17:04:56 GMT - - - - <![CDATA[Suicide bombing at Damascus church kills 25, Syrian authorities say]]> - - https://www.bbc.com/news/articles/c307n9p43z9o - https://www.bbc.com/news/articles/c307n9p43z9o#1 - Sun, 22 Jun 2025 21:26:21 GMT - - - - <![CDATA[Lyse Doucet: What the US and Iran do next could be momentous]]> - - https://www.bbc.com/news/articles/cy0w94yj68xo - https://www.bbc.com/news/articles/cy0w94yj68xo#2 - Sun, 22 Jun 2025 21:41:06 GMT - - - - <![CDATA[Decoy flights and seven B-2 stealth bombers - how US says it hit Iran's nuclear sites]]> - - https://www.bbc.com/news/articles/cew0x7159edo - https://www.bbc.com/news/articles/cew0x7159edo#2 - Sun, 22 Jun 2025 21:45:53 GMT - - - - <![CDATA[How China made electric vehicles mainstream]]> - - https://www.bbc.com/news/articles/c2d5ld8y8pwo - https://www.bbc.com/news/articles/c2d5ld8y8pwo#2 - Sun, 22 Jun 2025 23:17:26 GMT - - - - <![CDATA[Caught in the crossfire - the victims of Cape Town's gang warfare]]> - - https://www.bbc.com/news/articles/c1mg2xg339jo - https://www.bbc.com/news/articles/c1mg2xg339jo#2 - Sun, 22 Jun 2025 23:57:19 GMT - - - - <![CDATA['Tourists are our lifeline': Picking up the pieces in Kashmir after April killings]]> - - https://www.bbc.com/news/articles/cly8lx75d6ro - https://www.bbc.com/news/articles/cly8lx75d6ro#2 - Sun, 22 Jun 2025 23:53:10 GMT - - - - <![CDATA[What we know about US strikes on three Iranian nuclear sites]]> - - https://www.bbc.com/news/articles/cvg9r4q99g4o - https://www.bbc.com/news/articles/cvg9r4q99g4o#2 - Mon, 23 Jun 2025 14:29:31 GMT - - - - <![CDATA['Paralysed and constantly caught by surprise': Voices from Iran after US strikes]]> - - https://www.bbc.com/news/articles/cpwq2vnd827o - https://www.bbc.com/news/articles/cpwq2vnd827o#2 - Sun, 22 Jun 2025 16:25:21 GMT - - - - <![CDATA[How a self-styled knight giving away cars and wads of cash got people talking]]> - - https://www.bbc.com/news/articles/cn81ynyjy8no - https://www.bbc.com/news/articles/cn81ynyjy8no#2 - Sun, 22 Jun 2025 01:31:32 GMT - - - - <![CDATA[Watch: Video shows aftermath of Israeli strikes on Iran]]> - - https://www.bbc.com/news/videos/c8xvqje77ygo - https://www.bbc.com/news/videos/c8xvqje77ygo#3 - Mon, 23 Jun 2025 16:44:46 GMT - - - - <![CDATA[Netanyahu and Trump 'totally aligned' on Iran - Barkat]]> - - https://www.bbc.com/news/videos/c3en18eq435o - https://www.bbc.com/news/videos/c3en18eq435o#3 - Mon, 23 Jun 2025 15:31:40 GMT - - - - <![CDATA[Watch: How successful have the US strikes on Iran been?]]> - - https://www.bbc.com/news/videos/cq53l9dvggjo - https://www.bbc.com/news/videos/cq53l9dvggjo#3 - Sun, 22 Jun 2025 20:39:06 GMT - - - - <![CDATA[US strikes on Iran trigger protests internationally]]> - - https://www.bbc.com/news/videos/c4g2wxwel5qo - https://www.bbc.com/news/videos/c4g2wxwel5qo#3 - Mon, 23 Jun 2025 01:46:57 GMT - - - - <![CDATA[Watch: Trump says strikes on Iran a 'spectacular military success' in address]]> - - https://www.bbc.com/news/videos/c93k8nvdnqpo - https://www.bbc.com/news/videos/c93k8nvdnqpo#3 - Sun, 22 Jun 2025 02:52:36 GMT - - - - <![CDATA['Necessary' or 'unfounded'? Americans in Texas respond to US strikes]]> - - https://www.bbc.com/news/videos/c07dg04ke45o - https://www.bbc.com/news/videos/c07dg04ke45o#3 - Sun, 22 Jun 2025 22:47:25 GMT - - - - <![CDATA[Satellite images and decoy planes: Behind the US attacks on Iran]]> - - https://www.bbc.com/news/videos/cdezkx5nl1wo - https://www.bbc.com/news/videos/cdezkx5nl1wo#3 - Sun, 22 Jun 2025 18:02:12 GMT - - - - <![CDATA[Watch: The US has joined the Iran-Israel war. What happens now?]]> - - https://www.bbc.com/news/videos/c5ypw09gdzpo - https://www.bbc.com/news/videos/c5ypw09gdzpo#3 - Sun, 22 Jun 2025 13:58:09 GMT - - - - \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 4382f06..1fe54f8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,9 +28,35 @@ services: dockerfile: .docker/Dockerfile volumes: - .:/workspace:cached + - mongo_data:/data/db depends_on: - redis #entrypoint: ["/usr/local/bin/entrypoint.sh"] command: ["celery", "-A", justinsight.celery, "worker", "--loglevel=info"] #the lowercase j is actually so important + celery_beat: + build: + context: . + dockerfile: .docker/Dockerfile + volumes: + - .:/workspace:cached + depends_on: + - redis + #entrypoint: ["/usr/local/bin/entrypoint.sh"] + command: ["celery", "-A", justinsight.celery, "beat", "--loglevel=info"] #the lowercase j is actually so important + + mongo: + image: mongo:latest + container_name: mongo + restart: unless-stopped + environment: + MONGO_INITDB_ROOT_USERNAME: myuser + MONGO_INITDB_ROOT_PASSWORD: mypassword + ports: + - "27017:27017" + volumes: + - mongo_data:/data/db + - .:/workspace:cached +volumes: + mongo_data: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1ce978e..18115a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,6 @@ -celery[redis] \ No newline at end of file +beautifulsoup4 +celery[redis] +feedparser +playwright +pymongo +requests \ No newline at end of file diff --git a/src/justinsight/celery.py b/src/justinsight/celery.py index b1bdfa8..0292a0b 100644 --- a/src/justinsight/celery.py +++ b/src/justinsight/celery.py @@ -10,15 +10,23 @@ # Optional beat schedule app.conf.beat_schedule = { - "sample-task-every-10-seconds": { - "task": "justinsight.tasks.sample_task", - "schedule": 10.0, + # "sample-task-every-5-seconds": { + # "task": "justinsight.tasks.sample_task", + # "schedule": 5.0, + # "args": (), + # }, + + "check-BBCfeed-every-5-minutes": { + "task": "justinsight.tasks.bbcLogger_task", + "schedule": 300.0, "args": (), }, - "sample-task-every-5-seconds": { - "task": "justinsight.tasks.sample_task", - "schedule": 5.0, + + "check-NYTfeed-every-5-minutes": { + "task": "justinsight.tasks.nytLogger_task", + "schedule": 300.0, "args": (), }, + #schedule more tasks here } diff --git a/src/justinsight/tasks.py b/src/justinsight/tasks.py index 2b7057a..36b6c33 100644 --- a/src/justinsight/tasks.py +++ b/src/justinsight/tasks.py @@ -1,11 +1,22 @@ # tasks.py from celery import shared_task -from ingest.bbc_rss import testTask +from ingest.bbc_rss import check_and_save_new_entries as check_and_save_bbc +from ingest.nyt_rss import check_and_save_new_entries as check_and_save_nyt @shared_task def sample_task(): - testTask() + print("Sample task!") return "done!" +@shared_task +def bbcLogger_task(): + check_and_save_bbc() + return "BBC RSS Feed checked." + +@shared_task +def nytLogger_task(): + check_and_save_nyt() + return "NYT RSS Feed checked." + #Add more tasks here in the format of the one above \ No newline at end of file