From 47aa4dc45accaaf1f0f4b7f96c99e01f899d9d35 Mon Sep 17 00:00:00 2001
From: Tom Faulkner <tomfaulkner@gmail.com>
Date: Tue, 20 Feb 2018 20:26:33 -0600
Subject: [PATCH 1/7] dos2unix... move files

---
 news/__init__.py             |   0
 news/config_reader.py        |  16 +++++
 news/extract_main_content.py |  61 ++++++++++++++++
 news/extractor.py            | 133 +++++++++++++++++++++++++++++++++++
 news/news.py                 | 113 +++++++++++++++++++++++++++++
 news/news_pulling.py         |  92 ++++++++++++++++++++++++
 6 files changed, 415 insertions(+)
 create mode 100644 news/__init__.py
 create mode 100644 news/config_reader.py
 create mode 100644 news/extract_main_content.py
 create mode 100644 news/extractor.py
 create mode 100755 news/news.py
 create mode 100644 news/news_pulling.py

diff --git a/news/__init__.py b/news/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/news/config_reader.py b/news/config_reader.py
new file mode 100644
index 0000000..26831b3
--- /dev/null
+++ b/news/config_reader.py
@@ -0,0 +1,16 @@
+import yaml
+
+
+class ConfigurationReader:
+    def __init__(self):
+        with open('config.yml') as ymlfile:
+            cfg = yaml.load(ymlfile)
+        self.APIKEY = cfg['Apikey']
+        self.limit = cfg['Limit']
+        self.websites_supported = cfg['WebsiteSupported']
+
+        # TODO: Move to using this, and reading it from env, config, defaults
+        self.user_agent = cfg.get('User-Agent',
+                                  'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
+                                  ' AppleWebKit/537.36 (KHTML, like Gecko '
+                                  'Chrome/59.0.3071.115 Safari/537.36')
diff --git a/news/extract_main_content.py b/news/extract_main_content.py
new file mode 100644
index 0000000..2f00438
--- /dev/null
+++ b/news/extract_main_content.py
@@ -0,0 +1,61 @@
+import requests
+from config_reader import ConfigurationReader
+from extractor import *
+import textwrap
+
+
+class ExtractMainContent:
+    def __init__(self, source, articleurl):
+        self.extractorlist = [HuffingtonPost(), NYT(), BBC(
+        ), BloomBerg(), Guardian(), TheHindu(), TimesOfIndia()]
+        websites = ConfigurationReader().websites_supported
+        self.Mapping = {}
+        for index, website in enumerate(websites):
+            self.Mapping[website] = self.extractorlist[index]
+        self.Source = source
+        self.url = articleurl
+        self.textWrap = textwrap.TextWrapper(
+            initial_indent='\t', subsequent_indent='\t', width=100)
+
+    def download(self):
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+                                 'AppleWebKit/537.36 (KHTML, like Gecko) '
+                                 'Chrome/59.0.3071.115 Safari/537.36'}
+        req = requests.get(self.url, headers=headers)
+        return req.text
+
+    # unused, but may be useful in the future
+    # def AddExtractorList(self, extractor):
+    #     self.extractorlist.append(extractor)
+
+    def _extract(self):
+        self.ExtractStrategy = self.Mapping[self.Source]
+        text = self.download()
+        return self.ExtractStrategy.extractor(text)
+
+    def beautify(self):
+        title, output = self._extract()
+        print("=" * (len(title) + 15))
+        print("\t" + title)
+        print("=" * (len(title) + 15))
+
+        print((self.textWrap.fill(output)))  # wrap of the line
+        print("*" * 80)
+        if len(output) == 0:
+            print("Sorry :(")
+            print("There isn't much text on the site besides video/image. To "
+                  "further view the media post, Go to the below link")
+            print(self.url)
+            print('*' * 80)
+            print("\n\n")
+
+    def save(self):
+        title, output = self._extract()
+
+        # Remove Chars not allowed in filenames
+        for char in ['<', '>', "/", ":", '"', "\\", "|", "?", "*"]:
+            if char in title:
+                title = title.replace(char, "")
+
+        with open(f'saved_articles/{title}.txt', "w+") as f:
+            f.write(output)
diff --git a/news/extractor.py b/news/extractor.py
new file mode 100644
index 0000000..8e1ac53
--- /dev/null
+++ b/news/extractor.py
@@ -0,0 +1,133 @@
+from bs4 import BeautifulSoup
+
+
+class Extractor:
+
+    def extractor(self, text):
+        pass
+
+    def _extraction_algo(self, text, htmlelement, classname):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        result = []
+        # print soup
+        maincontent = soup.find_all(htmlelement, class_=classname)
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(["script", "br", "figure", "image"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            result.append(content.text)
+        result = ''.join(result)
+        return (title, result)
+
+
+class HuffingtonPost(Extractor):
+    """class for Huffington Post parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        return self._extraction_algo(text, "div", "content-list-component text")
+
+
+class NYT(Extractor):
+    """class for New York Times parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        return self._extraction_algo(text, "p", "story-body-text story-content")
+
+
+class BBC(Extractor):
+    """class for BBC News parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        return self._extraction_algo(text, "div", "story-body__inner")
+
+
+class BloomBerg(Extractor):
+    """class for BloomBerg parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        return self._extraction_algo(text, "div", "body-copy")
+
+
+class Guardian(Extractor):
+    """class for Guardian parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all(
+            "div", class_="content__article-body from-content-api js-article__body")
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(["script", "br", "figure", "image"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            for foundcontent in content.find_all("p"):
+                Result.append(foundcontent.text)
+        Result = ''.join(Result)
+        return (title, Result)
+
+
+class TheHindu(Extractor):
+    """class for BloomBerg parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="article")
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(
+                ["script", "br", "figure", "image", "span"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            for foundcontent in content.find_all("p"):
+                Result.append(foundcontent.text)
+        Result = ''.join(Result)
+        return (title, Result)
+
+
+class TimesOfIndia(Extractor):
+    """class for BloomBerg parsing"""
+
+    def __init__(self):
+        Extractor.__init__(self)
+
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="Normal")
+        # print maincontent
+        for content in maincontent:
+            # print content.text
+            Result.append(content.text)
+        Result = ''.join(Result)
+        return (title, Result)
diff --git a/news/news.py b/news/news.py
new file mode 100755
index 0000000..5448741
--- /dev/null
+++ b/news/news.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+import sys
+from enum import Enum
+
+from news_pulling import NewsPulling
+from config_reader import ConfigurationReader
+from extract_main_content import ExtractMainContent
+
+
+class SelectionStatus(Enum):
+    BACK = 1
+    EXIT = 2
+    READ = 3
+
+
+def news_sources():
+    news_sources = ConfigurationReader().websites_supported
+    return news_sources
+
+
+def display_sources(sources):
+    for index, source in enumerate(sources):
+        print(f'[{index + 1}]\t{source}')
+    print("\nPlease enter the index of the news source or type 'quit' to exit")
+
+
+def display_title_banner():
+    # Cool Title/Banner
+    print("=" * 40)
+    print("\tNews at the Command Line")
+    print("=" * 40)
+    print()
+
+
+def prompt_for_source(sources):
+    while True:
+        display_sources(sources)
+        source_choice = input("News Source Number >>>> ")
+        # Quit
+        if(source_choice.lower() == "quit"):
+            sys.exit()
+        try:
+            source_choice = int(source_choice) - 1
+            if(source_choice >= len(sources) or source_choice < 0):
+                print("Please select an index between 1-" +
+                      str(len(sources)))
+            else:
+                return source_choice
+        except ValueError:
+            print("That is not a valid News Source Number")
+
+
+def prompt_for_article(max=0):
+    print("Do you want to read a story further? If yes, please select the"
+          "number corresponding to the article")
+    print("Enter 'back' to go back to the main menu")
+    print("Press 'quit' to quit")
+    while True:
+        article_selection = input("Article No >>>> ")
+
+        # Back
+        if(article_selection.lower()[0] == 'b'):
+            return SelectionStatus.BACK, None
+        # Exit
+        elif(article_selection.lower()[0] == 'q'):
+            return SelectionStatus.EXIT, None
+
+        article_selection = int(article_selection)
+        if 0 > article_selection - 1 or article_selection > max:
+            print(f'Please select an index between 1-{max}.')
+        else:
+            return SelectionStatus.READ, article_selection - 1
+
+
+def prompt_for_save():
+    while True:
+        print("Do you want to save this article in file")
+        selection = str(input("Want to save? y/n >>> "))
+        if selection[0].lower() == 'y':
+            return True
+        elif selection[0].lower() == 'n':
+            return False
+
+
+def main():
+    display_title_banner()
+
+    while True:
+        sources = news_sources()
+        source_choice = prompt_for_source(sources)
+
+        while True:
+            puller = NewsPulling(sources[source_choice])
+            articles = puller.beautify_articles()
+            status, article_selection = prompt_for_article(max=len(articles))
+            if status == SelectionStatus.EXIT:
+                sys.exit()
+            elif status == SelectionStatus.BACK:
+                break
+            else:
+                print("\n" * 5)
+                extr = ExtractMainContent(
+                    sources[source_choice], articles[article_selection][2])
+                extr.beautify()
+
+                if prompt_for_save():
+                    extr.save()
+                    print("File saved!\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/news/news_pulling.py b/news/news_pulling.py
new file mode 100644
index 0000000..613829d
--- /dev/null
+++ b/news/news_pulling.py
@@ -0,0 +1,92 @@
+import sys
+
+import requests
+from requests import ConnectionError
+
+from config_reader import ConfigurationReader
+
+
+class NewsPulling:
+    """This class is used to pull news from the internet depending on the source specified """
+
+    def __init__(self, newsSource):
+        self.Source = newsSource
+
+    def pull_news(self):
+        config = ConfigurationReader()
+        self.__APIKey = config.APIKEY
+        self.__Limit = config.limit
+        url = 'https://newsapi.org/v1/articles?source=' + \
+            self.Source + '&sortBy=top&apiKey=' + self.__APIKey
+        try:
+            req = requests.get(url)
+            if(req.status_code == 200):
+                return req
+            else:
+                print(
+                    "There is some issue in connecting to the internet. Please check your firewall or internet")
+        except ConnectionError as e:
+            print("A connection Attempt failed")
+            print(e.message)
+            sys.exit()
+
+    def json_read(self):
+        req = self.pull_news()
+        # indicate if we need to convert to utf-8
+        needsconversion = False
+        if req.encoding != 'utf-8':
+            needsconversion = True
+        req = req.json()
+        articles = req['articles']
+        noofarticles = len(articles)
+        maxarticles = min(noofarticles, self.__Limit)
+
+        FilteredArticles = []
+
+        for i in range(maxarticles):
+            article = articles[i]
+            if needsconversion:
+                description = str(article['description'], 'utf-8')
+                # print description
+                title = str(article['title'], 'utf-8')
+                Article_url = str(article['url'], 'utf-8')
+                DateofPublication = str(article['publishedAt'], 'utf-8')
+                Author = str(article['author'], 'utf-8')
+                FilteredArticles.append(
+                    [description, title, Article_url, DateofPublication, Author])
+            else:
+                description = article['description']
+                # print description
+                title = article['title']
+                Article_url = article['url']
+                DateofPublication = article['publishedAt']
+                Author = article['author']
+                FilteredArticles.append(
+                    [description, title, Article_url, DateofPublication, Author])
+        return FilteredArticles
+
+    def beautify_articles(self):
+        self.Articles = self.json_read()
+        if self.Articles is None or len(self.Articles) == 0:
+            print("No articles found")
+            sys.exit()
+        print("\n" + ("=" * 16) + " STORIES " + ("=" * 16))
+        for i in range(len(self.Articles)):
+            print("[" + str(i + 1) + "]", end=' ')
+            # Title
+            if self.Articles[i][1] is not None:
+                print("\t" + self.Articles[i][1])
+            # Summary
+            if self.Articles[i][0] is not None:
+                # Limit Summary Size
+                summary = self.Articles[i][0][:85] + \
+                    (self.Articles[i][0][85:] and '...')
+                print("\t" + summary)
+            # Author
+            if self.Articles[i][4] is not None:
+                print("\t" + self.Articles[i][4])
+            # Date
+            if self.Articles[i][3] is not None:
+                print("\t" + self.Articles[i][3] + "\n")
+        print("=" * 40)
+        return self.Articles

From d53f551dc4efe71ab96f22a1727c2f76893a711f Mon Sep 17 00:00:00 2001
From: Tom Faulkner <tomfaulkner@gmail.com>
Date: Tue, 20 Feb 2018 20:59:57 -0600
Subject: [PATCH 2/7] dos2unix everything else

---
 CONTRIBUTORS.md              |   2 +
 __version__.py               |   1 +
 config.yml                   |  28 ++++----
 config_reader.py             |  16 -----
 extract_main_content.py      |  61 ----------------
 extractor.py                 | 133 -----------------------------------
 news.py                      | 111 -----------------------------
 news/extract_main_content.py |   5 +-
 news/news.py                 |   6 +-
 news/news_pulling.py         |   2 +-
 news_pulling.py              |  92 ------------------------
 requirements.txt             |   1 +
 setup.py                     |  36 ++++++++++
 13 files changed, 60 insertions(+), 434 deletions(-)
 create mode 100644 CONTRIBUTORS.md
 create mode 100644 __version__.py
 delete mode 100644 config_reader.py
 delete mode 100644 extract_main_content.py
 delete mode 100644 extractor.py
 delete mode 100644 news.py
 delete mode 100644 news_pulling.py
 create mode 100644 setup.py

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
new file mode 100644
index 0000000..fee71c8
--- /dev/null
+++ b/CONTRIBUTORS.md
@@ -0,0 +1,2 @@
+Ankit Singh https://github.com/Griffintaur
+Tom Faulkner https://github.com/TomFaulkner
diff --git a/__version__.py b/__version__.py
new file mode 100644
index 0000000..b8023d8
--- /dev/null
+++ b/__version__.py
@@ -0,0 +1 @@
+__version__ = '0.0.1'
diff --git a/config.yml b/config.yml
index b11ddb4..7421ceb 100644
--- a/config.yml
+++ b/config.yml
@@ -1,15 +1,13 @@
-WebsiteSupported:
-    - the-huffington-post
-    - the-new-york-times
-    - bbc-news
-    - bloomberg
-    - the-guardian-uk
-    - the-hindu
-    - the-times-of-india
-
-# Posts shown   
-Limit: 10
-
-Apikey: bda5818cc2af461e98330ccdf6fb9cbe
-
-    
\ No newline at end of file
+WebsiteSupported:
+    - the-huffington-post
+    - the-new-york-times
+    - bbc-news
+    - bloomberg
+    - the-guardian-uk
+    - the-hindu
+    - the-times-of-india
+
+# Posts shown
+Limit: 10
+
+Apikey: bda5818cc2af461e98330ccdf6fb9cbe
diff --git a/config_reader.py b/config_reader.py
deleted file mode 100644
index 0f36639..0000000
--- a/config_reader.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import yaml
-
-
-class ConfigurationReader:
-    def __init__(self):
-        with open('config.yml') as ymlfile:
-            cfg = yaml.load(ymlfile)
-        self.APIKEY = cfg['Apikey']
-        self.limit = cfg['Limit']
-        self.websites_supported = cfg['WebsiteSupported']
-
-        # TODO: Move to using this, and reading it from env, config, defaults
-        self.user_agent = cfg.get('User-Agent',
-                                  'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
-                                  ' AppleWebKit/537.36 (KHTML, like Gecko '
-                                  'Chrome/59.0.3071.115 Safari/537.36')
diff --git a/extract_main_content.py b/extract_main_content.py
deleted file mode 100644
index d91ea78..0000000
--- a/extract_main_content.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import requests
-from config_reader import ConfigurationReader
-from extractor import *
-import textwrap
-
-
-class ExtractMainContent:
-    def __init__(self, source, articleurl):
-        self.extractorlist = [HuffingtonPost(), NYT(), BBC(
-        ), BloomBerg(), Guardian(), TheHindu(), TimesOfIndia()]
-        websites = ConfigurationReader().websites_supported
-        self.Mapping = {}
-        for index, website in enumerate(websites):
-            self.Mapping[website] = self.extractorlist[index]
-        self.Source = source
-        self.url = articleurl
-        self.textWrap = textwrap.TextWrapper(
-            initial_indent='\t', subsequent_indent='\t', width=100)
-
-    def download(self):
-        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-                                 'AppleWebKit/537.36 (KHTML, like Gecko) '
-                                 'Chrome/59.0.3071.115 Safari/537.36'}
-        req = requests.get(self.url, headers=headers)
-        return req.text
-
-    # unused, but may be useful in the future
-    # def AddExtractorList(self, extractor):
-    #     self.extractorlist.append(extractor)
-
-    def _extract(self):
-        self.ExtractStrategy = self.Mapping[self.Source]
-        text = self.download()
-        return self.ExtractStrategy.extractor(text)
-
-    def beautify(self):
-        title, output = self._extract()
-        print("=" * (len(title) + 15))
-        print("\t" + title)
-        print("=" * (len(title) + 15))
-
-        print((self.textWrap.fill(output)))  # wrap of the line
-        print("*" * 80)
-        if len(output) == 0:
-            print("Sorry :(")
-            print("There isn't much text on the site besides video/image. To "
-                  "further view the media post, Go to the below link")
-            print(self.url)
-            print('*' * 80)
-            print("\n\n")
-
-    def save(self):
-        title, output = self._extract()
-
-        # Remove Chars not allowed in filenames
-        for char in ['<', '>', "/", ":", '"', "\\", "|", "?", "*"]:
-            if char in title:
-                title = title.replace(char, "")
-
-        with open(f'saved_articles/{title}.txt', "w+") as f:
-            f.write(output)
diff --git a/extractor.py b/extractor.py
deleted file mode 100644
index ee985d5..0000000
--- a/extractor.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from bs4 import BeautifulSoup
-
-
-class Extractor:
-
-    def extractor(self, text):
-        pass
-
-    def _extraction_algo(self, text, htmlelement, classname):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        result = []
-        # print soup
-        maincontent = soup.find_all(htmlelement, class_=classname)
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(["script", "br", "figure", "image"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            result.append(content.text)
-        result = ''.join(result)
-        return (title, result)
-
-
-class HuffingtonPost(Extractor):
-    """class for Huffington Post parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "content-list-component text")
-
-
-class NYT(Extractor):
-    """class for New York Times parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "p", "story-body-text story-content")
-
-
-class BBC(Extractor):
-    """class for BBC News parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "story-body__inner")
-
-
-class BloomBerg(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "body-copy")
-
-
-class Guardian(Extractor):
-    """class for Guardian parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all(
-            "div", class_="content__article-body from-content-api js-article__body")
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(["script", "br", "figure", "image"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            for foundcontent in content.find_all("p"):
-                Result.append(foundcontent.text)
-        Result = ''.join(Result)
-        return (title, Result)
-
-
-class TheHindu(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all("div", class_="article")
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(
-                ["script", "br", "figure", "image", "span"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            for foundcontent in content.find_all("p"):
-                Result.append(foundcontent.text)
-        Result = ''.join(Result)
-        return (title, Result)
-
-
-class TimesOfIndia(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all("div", class_="Normal")
-        # print maincontent
-        for content in maincontent:
-            # print content.text
-            Result.append(content.text)
-        Result = ''.join(Result)
-        return (title, Result)
diff --git a/news.py b/news.py
deleted file mode 100644
index 64bf84f..0000000
--- a/news.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import sys
-from enum import Enum
-
-from news_pulling import NewsPulling
-from config_reader import ConfigurationReader
-from extract_main_content import ExtractMainContent
-
-
-class SelectionStatus(Enum):
-    BACK = 1
-    EXIT = 2
-    READ = 3
-
-
-def news_sources():
-    news_sources = ConfigurationReader().websites_supported
-    return news_sources
-
-
-def display_sources(sources):
-    for index, source in enumerate(sources):
-        print(f'[{index + 1}]\t{source}')
-    print("\nPlease enter the index of the news source or type 'quit' to exit")
-
-
-def display_title_banner():
-    # Cool Title/Banner
-    print("=" * 40)
-    print("\tNews at the Command Line")
-    print("=" * 40)
-    print()
-
-
-def prompt_for_source(sources):
-    while True:
-        display_sources(sources)
-        source_choice = input("News Source Number >>>> ")
-        # Quit
-        if(source_choice.lower() == "quit"):
-            sys.exit()
-        try:
-            source_choice = int(source_choice) - 1
-            if(source_choice >= len(sources) or source_choice < 0):
-                print("Please select an index between 1-" +
-                      str(len(sources)))
-            else:
-                return source_choice
-        except ValueError:
-            print("That is not a valid News Source Number")
-
-
-def prompt_for_article(max=0):
-    print("Do you want to read a story further? If yes, please select the"
-          "number corresponding to the article")
-    print("Enter 'back' to go back to the main menu")
-    print("Press 'quit' to quit")
-    while True:
-        article_selection = input("Article No >>>> ")
-
-        # Back
-        if(article_selection.lower()[0] == 'b'):
-            return SelectionStatus.BACK, None
-        # Exit
-        elif(article_selection.lower()[0] == 'q'):
-            return SelectionStatus.EXIT, None
-
-        article_selection = int(article_selection)
-        if 0 > article_selection - 1 or article_selection > max:
-            print(f'Please select an index between 1-{max}.')
-        else:
-            return SelectionStatus.READ, article_selection - 1
-
-
-def prompt_for_save():
-    while True:
-        print("Do you want to save this article in file")
-        selection = str(input("Want to save? y/n >>> "))
-        if selection[0].lower() == 'y':
-            return True
-        elif selection[0].lower() == 'n':
-            return False
-
-
-def main():
-    display_title_banner()
-
-    while True:
-        sources = news_sources()
-        source_choice = prompt_for_source(sources)
-
-        while True:
-            puller = NewsPulling(sources[source_choice])
-            articles = puller.beautify_articles()
-            status, article_selection = prompt_for_article(max=len(articles))
-            if status == SelectionStatus.EXIT:
-                sys.exit()
-            elif status == SelectionStatus.BACK:
-                break
-            else:
-                print("\n" * 5)
-                extr = ExtractMainContent(
-                    sources[source_choice], articles[article_selection][2])
-                extr.beautify()
-
-                if prompt_for_save():
-                    extr.save()
-                    print("File saved!\n")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/news/extract_main_content.py b/news/extract_main_content.py
index 2f00438..791ff93 100644
--- a/news/extract_main_content.py
+++ b/news/extract_main_content.py
@@ -1,8 +1,9 @@
 import requests
-from config_reader import ConfigurationReader
-from extractor import *
 import textwrap
 
+from .config_reader import ConfigurationReader
+from .extractor import *
+
 
 class ExtractMainContent:
     def __init__(self, source, articleurl):
diff --git a/news/news.py b/news/news.py
index 5448741..d74ec56 100755
--- a/news/news.py
+++ b/news/news.py
@@ -3,9 +3,9 @@
 import sys
 from enum import Enum
 
-from news_pulling import NewsPulling
-from config_reader import ConfigurationReader
-from extract_main_content import ExtractMainContent
+from .news_pulling import NewsPulling
+from .config_reader import ConfigurationReader
+from .extract_main_content import ExtractMainContent
 
 
 class SelectionStatus(Enum):
diff --git a/news/news_pulling.py b/news/news_pulling.py
index 613829d..c0c272b 100644
--- a/news/news_pulling.py
+++ b/news/news_pulling.py
@@ -3,7 +3,7 @@
 import requests
 from requests import ConnectionError
 
-from config_reader import ConfigurationReader
+from .config_reader import ConfigurationReader
 
 
 class NewsPulling:
diff --git a/news_pulling.py b/news_pulling.py
deleted file mode 100644
index 65c8501..0000000
--- a/news_pulling.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import sys
-
-import requests
-from requests import ConnectionError
-
-from config_reader import ConfigurationReader
-
-
-class NewsPulling:
-    """This class is used to pull news from the internet depending on the source specified """
-
-    def __init__(self, newsSource):
-        self.Source = newsSource
-
-    def pull_news(self):
-        config = ConfigurationReader()
-        self.__APIKey = config.APIKEY
-        self.__Limit = config.limit
-        url = 'https://newsapi.org/v1/articles?source=' + \
-            self.Source + '&sortBy=top&apiKey=' + self.__APIKey
-        try:
-            req = requests.get(url)
-            if(req.status_code == 200):
-                return req
-            else:
-                print(
-                    "There is some issue in connecting to the internet. Please check your firewall or internet")
-        except ConnectionError as e:
-            print("A connection Attempt failed")
-            print(e.message)
-            sys.exit()
-
-    def json_read(self):
-        req = self.pull_news()
-        # indicate if we need to convert to utf-8
-        needsconversion = False
-        if req.encoding != 'utf-8':
-            needsconversion = True
-        req = req.json()
-        articles = req['articles']
-        noofarticles = len(articles)
-        maxarticles = min(noofarticles, self.__Limit)
-
-        FilteredArticles = []
-
-        for i in range(maxarticles):
-            article = articles[i]
-            if needsconversion:
-                description = str(article['description'], 'utf-8')
-                # print description
-                title = str(article['title'], 'utf-8')
-                Article_url = str(article['url'], 'utf-8')
-                DateofPublication = str(article['publishedAt'], 'utf-8')
-                Author = str(article['author'], 'utf-8')
-                FilteredArticles.append(
-                    [description, title, Article_url, DateofPublication, Author])
-            else:
-                description = article['description']
-                # print description
-                title = article['title']
-                Article_url = article['url']
-                DateofPublication = article['publishedAt']
-                Author = article['author']
-                FilteredArticles.append(
-                    [description, title, Article_url, DateofPublication, Author])
-        return FilteredArticles
-
-    def beautify_articles(self):
-        self.Articles = self.json_read()
-        if self.Articles is None or len(self.Articles) == 0:
-            print("No articles found")
-            sys.exit()
-        print("\n" + ("=" * 16) + " STORIES " + ("=" * 16))
-        for i in range(len(self.Articles)):
-            print("[" + str(i + 1) + "]", end=' ')
-            # Title
-            if self.Articles[i][1] is not None:
-                print("\t" + self.Articles[i][1])
-            # Summary
-            if self.Articles[i][0] is not None:
-                # Limit Summary Size
-                summary = self.Articles[i][0][:85] + \
-                    (self.Articles[i][0][85:] and '...')
-                print("\t" + summary)
-            # Author
-            if self.Articles[i][4] is not None:
-                print("\t" + self.Articles[i][4])
-            # Date
-            if self.Articles[i][3] is not None:
-                print("\t" + self.Articles[i][3] + "\n")
-        print("=" * 40)
-        return self.Articles
diff --git a/requirements.txt b/requirements.txt
index 345cd29..f3917db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ idna==2.6
 PyYAML==3.12
 requests==2.18.4
 urllib3==1.22
+wheel
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..562f227
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,36 @@
+from setuptools import setup, find_packages
+from os import path
+
+from __version__ import __version__
+here = path.abspath(path.dirname(__file__))
+
+with open('README.md') as f:
+    long_description = f.read()
+
+setup(
+    name="News At The Command Line",
+    version=__version__,
+    description="Read your news on your favourite terminal",
+    author="Ankit Singh",
+    packages=['news'],
+    package_dir={'news': 'news'},
+    long_description=long_description,
+
+    install_requires=[
+        'bs4>=0.0.1',
+        'beautifulsoup4>=4.6.0',
+        'PyYAML>=3.12',
+        'requests>=2.18.4',
+    ],
+
+    license='MIT',
+    entry_points={
+        'console_scripts': [
+            'newsctl=news.news:main'
+        ]
+    },
+    classifiers=[
+        'Environment :: Console',
+        'Intended Audience :: End Users/Desktop',
+    ]
+)

From caaab64978a340649e09c7c14ac36adb2b267d3f Mon Sep 17 00:00:00 2001
From: Tom Faulkner <tomfaulkner@gmail.com>
Date: Tue, 20 Feb 2018 21:08:17 -0600
Subject: [PATCH 3/7] readme and todo

---
 README.md | 16 +++++++++-------
 TODO.md   |  5 +++--
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e28ea0c..45c6ef9 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-## Please note that I have made changes that invalidate the information in the readme. To run create the saved_articles directory, if it isn't pulled from the repo, then use Python 3.6+ and run `python3 news.py`. I'll fix the readme soon, and provide a better way to install and run. Thanks. - Tom
-
 # News at the Command line
 ### Want to be kept updated without visiting the news portals every now and then
 
@@ -9,11 +7,16 @@
 # Modules Requirements
 
 - **Python 3.6+**
-- **Requests** 
-- **Beautiful Soup** 
+- **Requests**
+- **Beautiful Soup**
 - **PyYAML**
 
-To install the module dependencies before running the application, simply navigate into the project folder and run `pip install -r requirements.txt`.
+# Installation
+1. `git clone` the repository, preferably into a virtual environment.
+2. Copy `config.yml` into your home directory.
+3. Run with `newsctl`
+
+At present `config.yml` is only read from pwd when the script is run, I'll fix this soon.
 
 # Working
 - All sample input images are placed under the **Images** folder.
@@ -21,7 +24,7 @@ To install the module dependencies before running the application, simply naviga
 
 # How To Use
  Make sure you have installed required libraries, instructions above.
- Just run the main.py, do this by typing `py main.py`. 
+ Just run the main.py, do this by typing `py main.py`.
  The rest is quite straight forward.
 
 # Contributing
@@ -32,4 +35,3 @@ Please open an issue on GitHub if you'd like to report a bug or request a featur
 
 ## License
 The code is released under MIT license and free to use.
-
diff --git a/TODO.md b/TODO.md
index 3795fbf..7a3ebe3 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,6 +1,7 @@
-* Move all files to proper locations (./news)
+* ~~ Move all files to proper locations (./news) ~~
+* Read config.yml from a default location, or ENV variable location
 * Test all the things
-* Update README
+* Update README (further updates)
 * Read from environment variables + config + command line args
 * Edit configuration in program
 * Move to plugins for news sources

From 7bfaab89927e2e8d6f780325f9bf91c1f3455e39 Mon Sep 17 00:00:00 2001
From: Tom Faulkner <Tom@tom.am>
Date: Tue, 20 Feb 2018 21:20:02 -0600
Subject: [PATCH 4/7] Update TODO.md

---
 TODO.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TODO.md b/TODO.md
index 7a3ebe3..e3d83e0 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,4 +1,4 @@
-* ~~ Move all files to proper locations (./news) ~~
+* ~~Move all files to proper locations (./news)~~
 * Read config.yml from a default location, or ENV variable location
 * Test all the things
 * Update README (further updates)

From 4db0e8fb9b4bc621a70f560cd085042afecbf8df Mon Sep 17 00:00:00 2001
From: Tom Faulkner <Tom@tom.am>
Date: Tue, 20 Feb 2018 21:21:59 -0600
Subject: [PATCH 5/7] Delete TODO.md

---
 TODO.md | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 TODO.md

diff --git a/TODO.md b/TODO.md
deleted file mode 100644
index e3d83e0..0000000
--- a/TODO.md
+++ /dev/null
@@ -1,11 +0,0 @@
-* ~~Move all files to proper locations (./news)~~
-* Read config.yml from a default location, or ENV variable location
-* Test all the things
-* Update README (further updates)
-* Read from environment variables + config + command line args
-* Edit configuration in program
-* Move to plugins for news sources
-* Move all prompts to string constants file for easy changes and translations
-* Dependency inject BeautifulSoup in extractor
-
-* Consider REST client / server architecture

From 0615d64ad0002a6af05f9c854b982dfb5bc838da Mon Sep 17 00:00:00 2001
From: Tom Faulkner <tomfaulkner@gmail.com>
Date: Tue, 20 Feb 2018 23:13:06 -0600
Subject: [PATCH 6/7] plugin system in place, config moved

---
 __version__.py                             |   1 -
 news/__version__.py                        |   2 +
 news/config_reader.py                      |  26 +++-
 news/constants.py                          |   6 +
 news/extract_main_content.py               |  17 ++-
 news/extractor.py                          | 133 ---------------------
 news/news.py                               |   5 +-
 news/news_pulling.py                       |  16 ++-
 news/reader.py                             |  19 +++
 news/reader_plugins/bbc.py                 |   9 ++
 news/reader_plugins/bloomberg.py           |   7 ++
 news/reader_plugins/guardian.py            |  26 ++++
 news/reader_plugins/hindu.py               |  24 ++++
 news/reader_plugins/huffington_post.py     |   8 ++
 news/reader_plugins/new_york_times.py      |   8 ++
 news/reader_plugins/plugin_registration.py |  17 +++
 news/reader_plugins/times_of_india.py      |  19 +++
 requirements.txt                           |   1 +
 setup.py                                   |   2 +-
 19 files changed, 190 insertions(+), 156 deletions(-)
 delete mode 100644 __version__.py
 create mode 100644 news/__version__.py
 create mode 100644 news/constants.py
 delete mode 100644 news/extractor.py
 create mode 100644 news/reader.py
 create mode 100644 news/reader_plugins/bbc.py
 create mode 100644 news/reader_plugins/bloomberg.py
 create mode 100644 news/reader_plugins/guardian.py
 create mode 100644 news/reader_plugins/hindu.py
 create mode 100644 news/reader_plugins/huffington_post.py
 create mode 100644 news/reader_plugins/new_york_times.py
 create mode 100644 news/reader_plugins/plugin_registration.py
 create mode 100644 news/reader_plugins/times_of_india.py

diff --git a/__version__.py b/__version__.py
deleted file mode 100644
index b8023d8..0000000
--- a/__version__.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = '0.0.1'
diff --git a/news/__version__.py b/news/__version__.py
new file mode 100644
index 0000000..75c1d9c
--- /dev/null
+++ b/news/__version__.py
@@ -0,0 +1,2 @@
+__app_name__ = 'newsctl'
+__version__ = '0.0.1'
diff --git a/news/config_reader.py b/news/config_reader.py
index 26831b3..d68755d 100644
--- a/news/config_reader.py
+++ b/news/config_reader.py
@@ -1,13 +1,29 @@
+import os
+from contextlib import suppress
+
 import yaml
+from appdirs import AppDirs
+
+from .__version__ import __app_name__
+from .constants import constants
+
+dirs = AppDirs(__app_name__)
 
 
 class ConfigurationReader:
     def __init__(self):
-        with open('config.yml') as ymlfile:
-            cfg = yaml.load(ymlfile)
-        self.APIKEY = cfg['Apikey']
-        self.limit = cfg['Limit']
-        self.websites_supported = cfg['WebsiteSupported']
+        try:
+            with open(f'{dirs.user_config_dir}/config.yml') as ymlfile:
+                cfg = yaml.load(ymlfile)
+        except FileNotFoundError:
+            with suppress(FileExistsError):
+                os.makedirs(dirs.user_config_dir)
+            with open(f'{dirs.user_config_dir}/config.yml', 'w') as ymlfile:
+                ymlfile.write(yaml.dump(constants['config_defaults']))
+            cfg = constants['config_defaults']
+
+        self.APIKEY = cfg['api_key']
+        self.limit = cfg['article_limit']
 
         # TODO: Move to using this, and reading it from env, config, defaults
         self.user_agent = cfg.get('User-Agent',
diff --git a/news/constants.py b/news/constants.py
new file mode 100644
index 0000000..cb258e2
--- /dev/null
+++ b/news/constants.py
@@ -0,0 +1,6 @@
+constants = {
+    'config_defaults': {
+        'api_key': 'bda5818cc2af461e98330ccdf6fb9cbe',
+        'article_limit': 10,
+    }
+}
diff --git a/news/extract_main_content.py b/news/extract_main_content.py
index 791ff93..576a3da 100644
--- a/news/extract_main_content.py
+++ b/news/extract_main_content.py
@@ -2,18 +2,17 @@
 import textwrap
 
 from .config_reader import ConfigurationReader
-from .extractor import *
+from .reader_plugins.plugin_registration import sites
+from .reader import Reader
 
 
 class ExtractMainContent:
     def __init__(self, source, articleurl):
-        self.extractorlist = [HuffingtonPost(), NYT(), BBC(
-        ), BloomBerg(), Guardian(), TheHindu(), TimesOfIndia()]
-        websites = ConfigurationReader().websites_supported
-        self.Mapping = {}
-        for index, website in enumerate(websites):
-            self.Mapping[website] = self.extractorlist[index]
-        self.Source = source
+
+        self.mapping = {}
+        for index, website in enumerate(sites):
+            self.mapping[website] = self.extractorlist[index]
+        self.source = source
         self.url = articleurl
         self.textWrap = textwrap.TextWrapper(
             initial_indent='\t', subsequent_indent='\t', width=100)
@@ -30,7 +29,7 @@ def download(self):
     #     self.extractorlist.append(extractor)
 
     def _extract(self):
-        self.ExtractStrategy = self.Mapping[self.Source]
+        self.ExtractStrategy = self.mapping[self.source]
         text = self.download()
         return self.ExtractStrategy.extractor(text)
 
diff --git a/news/extractor.py b/news/extractor.py
deleted file mode 100644
index 8e1ac53..0000000
--- a/news/extractor.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from bs4 import BeautifulSoup
-
-
-class Extractor:
-
-    def extractor(self, text):
-        pass
-
-    def _extraction_algo(self, text, htmlelement, classname):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        result = []
-        # print soup
-        maincontent = soup.find_all(htmlelement, class_=classname)
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(["script", "br", "figure", "image"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            result.append(content.text)
-        result = ''.join(result)
-        return (title, result)
-
-
-class HuffingtonPost(Extractor):
-    """class for Huffington Post parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "content-list-component text")
-
-
-class NYT(Extractor):
-    """class for New York Times parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "p", "story-body-text story-content")
-
-
-class BBC(Extractor):
-    """class for BBC News parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "story-body__inner")
-
-
-class BloomBerg(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        return self._extraction_algo(text, "div", "body-copy")
-
-
-class Guardian(Extractor):
-    """class for Guardian parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all(
-            "div", class_="content__article-body from-content-api js-article__body")
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(["script", "br", "figure", "image"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            for foundcontent in content.find_all("p"):
-                Result.append(foundcontent.text)
-        Result = ''.join(Result)
-        return (title, Result)
-
-
-class TheHindu(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all("div", class_="article")
-        # print maincontent
-        for content in maincontent:
-            scripttags = content.find_all(
-                ["script", "br", "figure", "image", "span"])
-            for scripttag in scripttags:
-                scripttag.extract()
-            # print content.text
-            for foundcontent in content.find_all("p"):
-                Result.append(foundcontent.text)
-        Result = ''.join(Result)
-        return (title, Result)
-
-
-class TimesOfIndia(Extractor):
-    """class for BloomBerg parsing"""
-
-    def __init__(self):
-        Extractor.__init__(self)
-
-    def extractor(self, text):
-        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.title.string
-        Result = []
-        # print soup
-        maincontent = soup.find_all("div", class_="Normal")
-        # print maincontent
-        for content in maincontent:
-            # print content.text
-            Result.append(content.text)
-        Result = ''.join(Result)
-        return (title, Result)
diff --git a/news/news.py b/news/news.py
index d74ec56..3f5421a 100755
--- a/news/news.py
+++ b/news/news.py
@@ -4,8 +4,8 @@
 from enum import Enum
 
 from .news_pulling import NewsPulling
-from .config_reader import ConfigurationReader
 from .extract_main_content import ExtractMainContent
+from .reader_plugins.plugin_registration import sites
 
 
 class SelectionStatus(Enum):
@@ -15,7 +15,8 @@ class SelectionStatus(Enum):
 
 
 def news_sources():
-    news_sources = ConfigurationReader().websites_supported
+    news_sources = tuple(sites.keys())
+    print('news sources', news_sources)
     return news_sources
 
 
diff --git a/news/news_pulling.py b/news/news_pulling.py
index c0c272b..b80015e 100644
--- a/news/news_pulling.py
+++ b/news/news_pulling.py
@@ -5,21 +5,27 @@
 
 from .config_reader import ConfigurationReader
 
+# TODO: Take reader plugin, read name from it for source in pull_news
+# TODO: Rewrite this entire module
+# TODO: Rewrite everything
+# TODO: make newsapi.org replaceable, especially since there is a v2 api
+
 
 class NewsPulling:
-    """This class is used to pull news from the internet depending on the source specified """
+    """Pull news from the internet depending on the source specified."""
 
-    def __init__(self, newsSource):
-        self.Source = newsSource
+    def __init__(self, source):
+        self.source = source
 
     def pull_news(self):
         config = ConfigurationReader()
-        self.__APIKey = config.APIKEY
         self.__Limit = config.limit
         url = 'https://newsapi.org/v1/articles?source=' + \
-            self.Source + '&sortBy=top&apiKey=' + self.__APIKey
+            self.source + '&sortBy=top&apiKey=' + config.APIKEY
+        print(url)
         try:
             req = requests.get(url)
+            print(req)
             if(req.status_code == 200):
                 return req
             else:
diff --git a/news/reader.py b/news/reader.py
new file mode 100644
index 0000000..f2fcc0a
--- /dev/null
+++ b/news/reader.py
@@ -0,0 +1,19 @@
+from bs4 import BeautifulSoup
+
+
+class Reader:
+    def _extraction_algo(self, text, htmlelement, classname):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        result = []
+        # print soup
+        maincontent = soup.find_all(htmlelement, class_=classname)
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(["script", "br", "figure", "image"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            result.append(content.text)
+        result = ''.join(result)
+        return (title, result)
diff --git a/news/reader_plugins/bbc.py b/news/reader_plugins/bbc.py
new file mode 100644
index 0000000..bafb167
--- /dev/null
+++ b/news/reader_plugins/bbc.py
@@ -0,0 +1,9 @@
+from news.reader import Reader
+
+
+class BBC(Reader):
+    """class for BBC News parsing"""
+    source_name = 'bbc-news'
+
+    def extractor(self, text):
+        return self._extraction_algo(text, "div", "story-body__inner")
diff --git a/news/reader_plugins/bloomberg.py b/news/reader_plugins/bloomberg.py
new file mode 100644
index 0000000..848060d
--- /dev/null
+++ b/news/reader_plugins/bloomberg.py
@@ -0,0 +1,7 @@
+from news.reader import Reader
+
+
+class Bloomberg(Reader):
+    """class for BloomBerg parsing"""
+    def extractor(self, text):
+        return self._extraction_algo(text, "div", "body-copy")
diff --git a/news/reader_plugins/guardian.py b/news/reader_plugins/guardian.py
new file mode 100644
index 0000000..a05377f
--- /dev/null
+++ b/news/reader_plugins/guardian.py
@@ -0,0 +1,26 @@
+from bs4 import BeautifulSoup
+
+from news.reader import Reader
+
+
+class Guardian(Reader):
+    """class for Guardian parsing"""
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all(
+            "div",
+            class_="content__article-body from-content-api js-article__body"
+        )
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(["script", "br", "figure", "image"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            for foundcontent in content.find_all("p"):
+                Result.append(foundcontent.text)
+        Result = ''.join(Result)
+        return (title, Result)
diff --git a/news/reader_plugins/hindu.py b/news/reader_plugins/hindu.py
new file mode 100644
index 0000000..6e32f65
--- /dev/null
+++ b/news/reader_plugins/hindu.py
@@ -0,0 +1,24 @@
+from bs4 import BeautifulSoup
+
+from news.reader import Reader
+
+
+class TheHindu(Reader):
+    """class for The Hindu parsing"""
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="article")
+        # print maincontent
+        for content in maincontent:
+            scripttags = content.find_all(
+                ["script", "br", "figure", "image", "span"])
+            for scripttag in scripttags:
+                scripttag.extract()
+            # print content.text
+            for foundcontent in content.find_all("p"):
+                Result.append(foundcontent.text)
+        Result = ''.join(Result)
+        return (title, Result)
diff --git a/news/reader_plugins/huffington_post.py b/news/reader_plugins/huffington_post.py
new file mode 100644
index 0000000..4d4be25
--- /dev/null
+++ b/news/reader_plugins/huffington_post.py
@@ -0,0 +1,8 @@
+from news.reader import Reader
+
+
+class HuffingtonPost(Reader):
+    """class for Huffington Post parsing"""
+    def extractor(self, text):
+        return self._extraction_algo(text, "div",
+                                     "content-list-component text")
diff --git a/news/reader_plugins/new_york_times.py b/news/reader_plugins/new_york_times.py
new file mode 100644
index 0000000..b12576c
--- /dev/null
+++ b/news/reader_plugins/new_york_times.py
@@ -0,0 +1,8 @@
+from news.reader import Reader
+
+
+class NYT(Reader):
+    """class for New York Times parsing"""
+    def extractor(self, text):
+        return self._extraction_algo(text, "p",
+                                     "story-body-text story-content")
diff --git a/news/reader_plugins/plugin_registration.py b/news/reader_plugins/plugin_registration.py
new file mode 100644
index 0000000..d987cf2
--- /dev/null
+++ b/news/reader_plugins/plugin_registration.py
@@ -0,0 +1,17 @@
+from news.reader_plugins.huffington_post import HuffingtonPost
+from news.reader_plugins.new_york_times import NYT
+from news.reader_plugins.bbc import BBC
+from news.reader_plugins.bloomberg import Bloomberg
+from news.reader_plugins.guardian import Guardian
+from news.reader_plugins.hindu import TheHindu
+from news.reader_plugins.times_of_india import TimesOfIndia
+
+sites = {
+    'Huffington Post': HuffingtonPost,
+    'New York Times': NYT,
+    'BBC': BBC,
+    'Bloomberg': Bloomberg,
+    'Guardian': Guardian,
+    'The Hindu': TheHindu,
+    'Times of India': TimesOfIndia
+}
diff --git a/news/reader_plugins/times_of_india.py b/news/reader_plugins/times_of_india.py
new file mode 100644
index 0000000..266a4ad
--- /dev/null
+++ b/news/reader_plugins/times_of_india.py
@@ -0,0 +1,19 @@
+from bs4 import BeautifulSoup
+
+from news.reader import Reader
+
+
+class TimesOfIndia(Reader):
+    """class for Times of India parsing"""
+    def extractor(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="Normal")
+        # print maincontent
+        for content in maincontent:
+            # print content.text
+            Result.append(content.text)
+        Result = ''.join(Result)
+        return (title, Result)
diff --git a/requirements.txt b/requirements.txt
index f3917db..4d4e19c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ PyYAML==3.12
 requests==2.18.4
 urllib3==1.22
 wheel
+appdirs==1.4.3
diff --git a/setup.py b/setup.py
index 562f227..83b1adf 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 from os import path
 
-from __version__ import __version__
+from news.__version__ import __version__
 here = path.abspath(path.dirname(__file__))
 
 with open('README.md') as f:

From bc12fae1396ccbc46f5e7b6d6f2b302332108826 Mon Sep 17 00:00:00 2001
From: Tom Faulkner <tomfaulkner@gmail.com>
Date: Thu, 22 Feb 2018 21:13:13 -0600
Subject: [PATCH 7/7] updated plugins, more overhaul, a bit of pep8

---
 news/config_reader.py                  |  6 ------
 news/extract_main_content.py           | 13 +------------
 news/news.py                           |  5 +++--
 news/news_pulling.py                   | 21 ++++++++-------------
 news/reader_plugins/bloomberg.py       |  2 ++
 news/reader_plugins/guardian.py        |  2 ++
 news/reader_plugins/hindu.py           |  2 ++
 news/reader_plugins/huffington_post.py |  2 ++
 news/reader_plugins/new_york_times.py  |  2 ++
 news/reader_plugins/times_of_india.py  |  2 ++
 10 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/news/config_reader.py b/news/config_reader.py
index d68755d..95b6132 100644
--- a/news/config_reader.py
+++ b/news/config_reader.py
@@ -24,9 +24,3 @@ def __init__(self):
 
         self.APIKEY = cfg['api_key']
         self.limit = cfg['article_limit']
-
-        # TODO: Move to using this, and reading it from env, config, defaults
-        self.user_agent = cfg.get('User-Agent',
-                                  'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
-                                  ' AppleWebKit/537.36 (KHTML, like Gecko '
-                                  'Chrome/59.0.3071.115 Safari/537.36')
diff --git a/news/extract_main_content.py b/news/extract_main_content.py
index 576a3da..2b9f1c2 100644
--- a/news/extract_main_content.py
+++ b/news/extract_main_content.py
@@ -1,17 +1,11 @@
 import requests
 import textwrap
 
-from .config_reader import ConfigurationReader
 from .reader_plugins.plugin_registration import sites
-from .reader import Reader
 
 
 class ExtractMainContent:
     def __init__(self, source, articleurl):
-
-        self.mapping = {}
-        for index, website in enumerate(sites):
-            self.mapping[website] = self.extractorlist[index]
         self.source = source
         self.url = articleurl
         self.textWrap = textwrap.TextWrapper(
@@ -24,14 +18,9 @@ def download(self):
         req = requests.get(self.url, headers=headers)
         return req.text
 
-    # unused, but may be useful in the future
-    # def AddExtractorList(self, extractor):
-    #     self.extractorlist.append(extractor)
-
     def _extract(self):
-        self.ExtractStrategy = self.mapping[self.source]
         text = self.download()
-        return self.ExtractStrategy.extractor(text)
+        return sites[self.source]().extractor(text)
 
     def beautify(self):
         title, output = self._extract()
diff --git a/news/news.py b/news/news.py
index 3f5421a..305cba0 100755
--- a/news/news.py
+++ b/news/news.py
@@ -16,7 +16,6 @@ class SelectionStatus(Enum):
 
 def news_sources():
     news_sources = tuple(sites.keys())
-    print('news sources', news_sources)
     return news_sources
 
 
@@ -92,7 +91,9 @@ def main():
         source_choice = prompt_for_source(sources)
 
         while True:
-            puller = NewsPulling(sources[source_choice])
+            # TODO: This is ugly, but functional.
+            # Getting the name of thesource as used in the API from the plugin.
+            puller = NewsPulling(sites[sources[source_choice]]().source_name)
             articles = puller.beautify_articles()
             status, article_selection = prompt_for_article(max=len(articles))
             if status == SelectionStatus.EXIT:
diff --git a/news/news_pulling.py b/news/news_pulling.py
index b80015e..958582d 100644
--- a/news/news_pulling.py
+++ b/news/news_pulling.py
@@ -5,11 +5,6 @@
 
 from .config_reader import ConfigurationReader
 
-# TODO: Take reader plugin, read name from it for source in pull_news
-# TODO: Rewrite this entire module
-# TODO: Rewrite everything
-# TODO: make newsapi.org replaceable, especially since there is a v2 api
-
 
 class NewsPulling:
     """Pull news from the internet depending on the source specified."""
@@ -22,17 +17,16 @@ def pull_news(self):
         self.__Limit = config.limit
         url = 'https://newsapi.org/v1/articles?source=' + \
             self.source + '&sortBy=top&apiKey=' + config.APIKEY
-        print(url)
         try:
             req = requests.get(url)
             print(req)
-            if(req.status_code == 200):
+            if req.status_code == 200:
                 return req
             else:
-                print(
-                    "There is some issue in connecting to the internet. Please check your firewall or internet")
+                print("There is some issue in connecting to the internet."
+                      "Please check your firewall or internet")
         except ConnectionError as e:
-            print("A connection Attempt failed")
+            print("A connection attempt failed")
             print(e.message)
             sys.exit()
 
@@ -58,8 +52,8 @@ def json_read(self):
                 Article_url = str(article['url'], 'utf-8')
                 DateofPublication = str(article['publishedAt'], 'utf-8')
                 Author = str(article['author'], 'utf-8')
-                FilteredArticles.append(
-                    [description, title, Article_url, DateofPublication, Author])
+                FilteredArticles.append([description, title, Article_url,
+                                         DateofPublication, Author])
             else:
                 description = article['description']
                 # print description
@@ -68,7 +62,8 @@ def json_read(self):
                 DateofPublication = article['publishedAt']
                 Author = article['author']
                 FilteredArticles.append(
-                    [description, title, Article_url, DateofPublication, Author])
+                    [description, title, Article_url,
+                     DateofPublication, Author])
         return FilteredArticles
 
     def beautify_articles(self):
diff --git a/news/reader_plugins/bloomberg.py b/news/reader_plugins/bloomberg.py
index 848060d..893573b 100644
--- a/news/reader_plugins/bloomberg.py
+++ b/news/reader_plugins/bloomberg.py
@@ -3,5 +3,7 @@
 
 class Bloomberg(Reader):
     """class for BloomBerg parsing"""
+    source_name = 'bloomberg'
+
     def extractor(self, text):
         return self._extraction_algo(text, "div", "body-copy")
diff --git a/news/reader_plugins/guardian.py b/news/reader_plugins/guardian.py
index a05377f..82f7a1b 100644
--- a/news/reader_plugins/guardian.py
+++ b/news/reader_plugins/guardian.py
@@ -5,6 +5,8 @@
 
 class Guardian(Reader):
     """class for Guardian parsing"""
+    source_name = 'the-guardian-uk'
+
     def extractor(self, text):
         soup = BeautifulSoup(text, 'html.parser')
         title = soup.title.string
diff --git a/news/reader_plugins/hindu.py b/news/reader_plugins/hindu.py
index 6e32f65..e9c3bed 100644
--- a/news/reader_plugins/hindu.py
+++ b/news/reader_plugins/hindu.py
@@ -5,6 +5,8 @@
 
 class TheHindu(Reader):
     """class for The Hindu parsing"""
+    source_name = 'the-hindu'
+
     def extractor(self, text):
         soup = BeautifulSoup(text, 'html.parser')
         title = soup.title.string
diff --git a/news/reader_plugins/huffington_post.py b/news/reader_plugins/huffington_post.py
index 4d4be25..6ba7403 100644
--- a/news/reader_plugins/huffington_post.py
+++ b/news/reader_plugins/huffington_post.py
@@ -3,6 +3,8 @@
 
 class HuffingtonPost(Reader):
     """class for Huffington Post parsing"""
+    source_name = 'the-huffington-post'
+
     def extractor(self, text):
         return self._extraction_algo(text, "div",
                                      "content-list-component text")
diff --git a/news/reader_plugins/new_york_times.py b/news/reader_plugins/new_york_times.py
index b12576c..641c766 100644
--- a/news/reader_plugins/new_york_times.py
+++ b/news/reader_plugins/new_york_times.py
@@ -2,6 +2,8 @@
 
 
 class NYT(Reader):
+    source_name = 'the-new-york-times'
+
     """class for New York Times parsing"""
     def extractor(self, text):
         return self._extraction_algo(text, "p",
diff --git a/news/reader_plugins/times_of_india.py b/news/reader_plugins/times_of_india.py
index 266a4ad..37b11b9 100644
--- a/news/reader_plugins/times_of_india.py
+++ b/news/reader_plugins/times_of_india.py
@@ -5,6 +5,8 @@
 
 class TimesOfIndia(Reader):
     """class for Times of India parsing"""
+    source_name = 'the-times-of-india'
+
     def extractor(self, text):
         soup = BeautifulSoup(text, 'html.parser')
         title = soup.title.string