Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Ankit Singh https://github.com/Griffintaur
Tom Faulkner https://github.com/TomFaulkner
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
## Please note that I have made changes that invalidate the information in the readme. To run create the saved_articles directory, if it isn't pulled from the repo, then use Python 3.6+ and run `python3 news.py`. I'll fix the readme soon, and provide a better way to install and run. Thanks. - Tom

# News at the Command line
### Want to be kept updated without visiting the news portals every now and then

Expand All @@ -9,19 +7,24 @@
# Modules Requirements

- **Python 3.6+**
- **Requests**
- **Beautiful Soup**
- **Requests**
- **Beautiful Soup**
- **PyYAML**

To install the module dependencies before running the application, simply navigate into the project folder and run `pip install -r requirements.txt`.
# Installation
1. `git clone` the repository, preferably into a virtual environment.
2. Copy `config.yml` into your home directory.
3. Run with `newsctl`

At present `config.yml` is only read from pwd when the script is run, I'll fix this soon.

# Working
- All sample input images are placed under the **Images** folder.
- You can change the maximum number of posts in **config.yml**. Look for **Limit** attribute.

# How To Use
Make sure you have installed required libraries, instructions above.
Just run the main.py, do this by typing `py main.py`.
Just run the main.py, do this by typing `py main.py`.
The rest is quite straight forward.

# Contributing
Expand All @@ -32,4 +35,3 @@ Please open an issue on GitHub if you'd like to report a bug or request a featur

## License
The code is released under MIT license and free to use.

10 changes: 0 additions & 10 deletions TODO.md

This file was deleted.

28 changes: 13 additions & 15 deletions config.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
WebsiteSupported:
- the-huffington-post
- the-new-york-times
- bbc-news
- bloomberg
- the-guardian-uk
- the-hindu
- the-times-of-india

# Posts shown
Limit: 10

Apikey: bda5818cc2af461e98330ccdf6fb9cbe


WebsiteSupported:
- the-huffington-post
- the-new-york-times
- bbc-news
- bloomberg
- the-guardian-uk
- the-hindu
- the-times-of-india

# Posts shown
Limit: 10

Apikey: bda5818cc2af461e98330ccdf6fb9cbe
16 changes: 0 additions & 16 deletions config_reader.py

This file was deleted.

133 changes: 0 additions & 133 deletions extractor.py

This file was deleted.

Empty file added news/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions news/__version__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__app_name__ = 'newsctl'
__version__ = '0.0.1'
26 changes: 26 additions & 0 deletions news/config_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
from contextlib import suppress

import yaml
from appdirs import AppDirs

from .__version__ import __app_name__
from .constants import constants

dirs = AppDirs(__app_name__)


class ConfigurationReader:
def __init__(self):
try:
with open(f'{dirs.user_config_dir}/config.yml') as ymlfile:
cfg = yaml.load(ymlfile)
except FileNotFoundError:
with suppress(FileExistsError):
os.makedirs(dirs.user_config_dir)
with open(f'{dirs.user_config_dir}/config.yml', 'w') as ymlfile:
ymlfile.write(yaml.dump(constants['config_defaults']))
cfg = constants['config_defaults']

self.APIKEY = cfg['api_key']
self.limit = cfg['article_limit']
6 changes: 6 additions & 0 deletions news/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
constants = {
'config_defaults': {
'api_key': 'bda5818cc2af461e98330ccdf6fb9cbe',
'article_limit': 10,
}
}
111 changes: 50 additions & 61 deletions extract_main_content.py → news/extract_main_content.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,50 @@
import requests
from config_reader import ConfigurationReader
from extractor import *
import textwrap


class ExtractMainContent:
def __init__(self, source, articleurl):
self.extractorlist = [HuffingtonPost(), NYT(), BBC(
), BloomBerg(), Guardian(), TheHindu(), TimesOfIndia()]
websites = ConfigurationReader().websites_supported
self.Mapping = {}
for index, website in enumerate(websites):
self.Mapping[website] = self.extractorlist[index]
self.Source = source
self.url = articleurl
self.textWrap = textwrap.TextWrapper(
initial_indent='\t', subsequent_indent='\t', width=100)

def download(self):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/59.0.3071.115 Safari/537.36'}
req = requests.get(self.url, headers=headers)
return req.text

# unused, but may be useful in the future
# def AddExtractorList(self, extractor):
# self.extractorlist.append(extractor)

def _extract(self):
self.ExtractStrategy = self.Mapping[self.Source]
text = self.download()
return self.ExtractStrategy.extractor(text)

def beautify(self):
title, output = self._extract()
print("=" * (len(title) + 15))
print("\t" + title)
print("=" * (len(title) + 15))

print((self.textWrap.fill(output))) # wrap of the line
print("*" * 80)
if len(output) == 0:
print("Sorry :(")
print("There isn't much text on the site besides video/image. To "
"further view the media post, Go to the below link")
print(self.url)
print('*' * 80)
print("\n\n")

def save(self):
title, output = self._extract()

# Remove Chars not allowed in filenames
for char in ['<', '>', "/", ":", '"', "\\", "|", "?", "*"]:
if char in title:
title = title.replace(char, "")

with open(f'saved_articles/{title}.txt', "w+") as f:
f.write(output)
import requests
import textwrap

from .reader_plugins.plugin_registration import sites


class ExtractMainContent:
def __init__(self, source, articleurl):
self.source = source
self.url = articleurl
self.textWrap = textwrap.TextWrapper(
initial_indent='\t', subsequent_indent='\t', width=100)

def download(self):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/59.0.3071.115 Safari/537.36'}
req = requests.get(self.url, headers=headers)
return req.text

def _extract(self):
text = self.download()
return sites[self.source]().extractor(text)

def beautify(self):
title, output = self._extract()
print("=" * (len(title) + 15))
print("\t" + title)
print("=" * (len(title) + 15))

print((self.textWrap.fill(output))) # wrap of the line
print("*" * 80)
if len(output) == 0:
print("Sorry :(")
print("There isn't much text on the site besides video/image. To "
"further view the media post, Go to the below link")
print(self.url)
print('*' * 80)
print("\n\n")

def save(self):
title, output = self._extract()

# Remove Chars not allowed in filenames
for char in ['<', '>', "/", ":", '"', "\\", "|", "?", "*"]:
if char in title:
title = title.replace(char, "")

with open(f'saved_articles/{title}.txt', "w+") as f:
f.write(output)
Loading