From e24ea66cb0548c6e1126694efbf0f6bd6d489ea5 Mon Sep 17 00:00:00 2001
From: sachet-mittal <sachet.mittal@gmail.com>
Date: Tue, 3 Oct 2017 21:22:00 -0700
Subject: [PATCH] make code flake8 complient

$ flake8 *.py
ExtractMainContent.py:34:80: E501 line too long (144 > 79 characters)
ExtractMainContent.py:48:80: E501 line too long (90 > 79 characters)
ExtractMainContent.py:50:80: E501 line too long (90 > 79 characters)
ExtractMainContent.py:52:80: E501 line too long (97 > 79 characters)
ExtractMainContent.py:55:80: E501 line too long (131 > 79 characters)
ExtractMainContent.py:57:80: E501 line too long (101 > 79 characters)
Extractor.py:87:80: E501 line too long (84 > 79 characters)
Main.py:40:80: E501 line too long (120 > 79 characters)
Main.py:59:80: E501 line too long (89 > 79 characters)
NewsPulling.py:15:80: E501 line too long (94 > 79 characters)
NewsPulling.py:31:80: E501 line too long (113 > 79 characters)
---
 ExtractMainContent.py |  63 +++++++++--------
 Extractor.py          | 161 +++++++++++++++++++++++-------------------
 Main.py               |  64 +++++++++--------
 NewsPulling.py        | 101 +++++++++++++-------------
 configReader.py       |  30 ++++----
 5 files changed, 221 insertions(+), 198 deletions(-)

diff --git a/ExtractMainContent.py b/ExtractMainContent.py
index 2677a1e..0eead5e 100644
--- a/ExtractMainContent.py
+++ b/ExtractMainContent.py
@@ -6,40 +6,49 @@
 """
 import requests
 from configReader import ConfigurationReader
-from Extractor import *
+import Extractor
 import textwrap
 
+
 class ExtractMainContent(object):
-    def __init__(self,source,articleurl):
-        self.extractorlist=[HuffingtonPost(),NYT(),BBC(),BloomBerg(),Guardian(),TheHindu(),TimesOfIndia()]
-        websites=ConfigurationReader().GetWebsiteSupported()
-        self.Mapping={}
-        for index,website in enumerate(websites):
-            self.Mapping[website]=self.extractorlist[index]
-        self.Source=source
-        self.url=articleurl
-        self.textWrap=textwrap.TextWrapper(initial_indent='\t',subsequent_indent='\t',width=100)
-        
-        
+    def __init__(self, source, articleurl):
+        self.extractorlist = [
+            Extractor.HuffingtonPost(),
+            Extractor.NYT(),
+            Extractor.BBC(),
+            Extractor.BloomBerg(),
+            Extractor.Guardian(),
+            Extractor.TheHindu(),
+            Extractor.TimesOfIndia()]
+        websites = ConfigurationReader().GetWebsiteSupported()
+        self.Mapping = {}
+        for index, website in enumerate(websites):
+            self.Mapping[website] = self.extractorlist[index]
+        self.Source = source
+        self.url = articleurl
+        self.textWrap = textwrap.TextWrapper(
+            initial_indent='\t', subsequent_indent='\t', width=100)
+
     def DownloadContent(self):
-        headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
-        req=requests.get(self.url,headers=headers)
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
+        req = requests.get(self.url, headers=headers)
         return req.text
-    
-    def AddExtractorList(self,extractor):
+
+    def AddExtractorList(self, extractor):
         self.extractorlist.append(extractor)
-    
+
     def Extract(self):
-        self.ExtractStrategy=self.Mapping[self.Source]
-        text=self.DownloadContent()
+        self.ExtractStrategy = self.Mapping[self.Source]
+        text = self.DownloadContent()
         return self.ExtractStrategy.ExtractionAlgo(text)
-    
+
     def Beautify(self):
-        title,output=self.Extract()            
+        title, output = self.Extract()
         print "=========================================================================="
-        print "\t"+title
+        print "\t" + title
         print "=========================================================================="
-        print (self.textWrap.fill(output)) #wrap of the line
+        print (self.textWrap.fill(output))  # wrap of the line
         print "*********************************************************************************"
         print "\n\n"
         if len(output) == 0:
@@ -49,11 +58,7 @@ def Beautify(self):
             print "\n\n"
 
     def FileSave(self):
-        title,output=self.Extract()
-        article_file = open(title+".txt","w+")
+        title, output = self.Extract()
+        article_file = open(title + ".txt", "w+")
         article_file.write(output.encode('utf-8'))
         article_file.close()
-        
-    
-    
-    
diff --git a/Extractor.py b/Extractor.py
index 2e95af7..7036c48 100644
--- a/Extractor.py
+++ b/Extractor.py
@@ -6,122 +6,137 @@
 """
 from bs4 import BeautifulSoup
 
+
 class Extractor(object):
-    
-    def ExtractionAlgo(self,text):
+
+    def ExtractionAlgo(self, text):
         pass
-    
-    def TextExtractionAlgo(self,text,htmlelement,classname):
-        soup=BeautifulSoup(text,'html.parser')
-        title=soup.title.string
-        Result=[]
-        #print soup
-        maincontent=soup.find_all(htmlelement, class_=classname)
-        #print maincontent
+
+    def TextExtractionAlgo(self, text, htmlelement, classname):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all(htmlelement, class_=classname)
+        # print maincontent
         for content in maincontent:
-            scripttags=content.find_all(["script","br","figure","image"])
+            scripttags = content.find_all(["script", "br", "figure", "image"])
             for scripttag in scripttags:
                 scripttag.extract()
-            #print content.text
+            # print content.text
             Result.append(content.text)
-        Result=''.join(Result)
-        return (title,Result)
-    
-    
-    
+        Result = ''.join(Result)
+        return (title, Result)
+
+
 class HuffingtonPost(Extractor):
     """class for Huffington Post parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-    def ExtractionAlgo(self,text):
-        return Extractor.TextExtractionAlgo(self,text,"div","content-list-component text")
-        
-    
+
+    def ExtractionAlgo(self, text):
+        return Extractor.TextExtractionAlgo(
+            self, text, "div", "content-list-component text")
+
+
 class NYT(Extractor):
     """class for New York Times parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-    def ExtractionAlgo(self,text):
-        return Extractor.TextExtractionAlgo(self,text,"p","story-body-text story-content")
-    
-    
+
+    def ExtractionAlgo(self, text):
+        return Extractor.TextExtractionAlgo(
+            self, text, "p", "story-body-text story-content")
+
+
 class BBC(Extractor):
     """class for BBC News parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-        
-    def ExtractionAlgo(self,text):
-        return Extractor.TextExtractionAlgo(self,text,"div","story-body__inner")
-        
-    
+
+    def ExtractionAlgo(self, text):
+        return Extractor.TextExtractionAlgo(
+            self, text, "div", "story-body__inner")
+
+
 class BloomBerg(Extractor):
     """class for BloomBerg parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-        
-    def ExtractionAlgo(self,text):
-        return Extractor.TextExtractionAlgo(self,text,"div","body-copy")
-    
+
+    def ExtractionAlgo(self, text):
+        return Extractor.TextExtractionAlgo(self, text, "div", "body-copy")
+
+
 class Guardian(Extractor):
     """class for Guardian parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-        
-    def ExtractionAlgo(self,text):
-        soup=BeautifulSoup(text,'html.parser')
-        title=soup.title.string
-        Result=[]
-        #print soup
-        maincontent=soup.find_all("div", class_="content__article-body from-content-api js-article__body")
-        #print maincontent
+
+    def ExtractionAlgo(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all(
+            "div", class_="content__article-body from-content-api js-article__body")
+        # print maincontent
         for content in maincontent:
-            scripttags=content.find_all(["script","br","figure","image"])
+            scripttags = content.find_all(["script", "br", "figure", "image"])
             for scripttag in scripttags:
                 scripttag.extract()
-            #print content.text
+            # print content.text
             for foundcontent in content.find_all("p"):
                 Result.append(foundcontent.text)
-        Result=''.join(Result)
-        return (title,Result)
-    
+        Result = ''.join(Result)
+        return (title, Result)
+
+
 class TheHindu(Extractor):
     """class for BloomBerg parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-        
-    def ExtractionAlgo(self,text):
-        soup=BeautifulSoup(text,'html.parser')
-        title=soup.title.string
-        Result=[]
-        #print soup
-        maincontent=soup.find_all("div", class_="article")
-        #print maincontent
+
+    def ExtractionAlgo(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="article")
+        # print maincontent
         for content in maincontent:
-            scripttags=content.find_all(["script","br","figure","image","span"])
+            scripttags = content.find_all(
+                ["script", "br", "figure", "image", "span"])
             for scripttag in scripttags:
                 scripttag.extract()
-            #print content.text
+            # print content.text
             for foundcontent in content.find_all("p"):
                 Result.append(foundcontent.text)
-        Result=''.join(Result)
-        return (title,Result)
-    
+        Result = ''.join(Result)
+        return (title, Result)
+
+
 class TimesOfIndia(Extractor):
     """class for BloomBerg parsing"""
+
     def __init__(self):
         Extractor.__init__(self)
-        
-    def ExtractionAlgo(self,text):
-        soup=BeautifulSoup(text,'html.parser')
-        title=soup.title.string
-        Result=[]
-        #print soup
-        maincontent=soup.find_all("div", class_="Normal")
-        #print maincontent
+
+    def ExtractionAlgo(self, text):
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.title.string
+        Result = []
+        # print soup
+        maincontent = soup.find_all("div", class_="Normal")
+        # print maincontent
         for content in maincontent:
-            #print content.text
+            # print content.text
             Result.append(content.text)
-        Result=''.join(Result)
-        return (title,Result)      
-
-        
\ No newline at end of file
+        Result = ''.join(Result)
+        return (title, Result)
diff --git a/Main.py b/Main.py
index bea51da..c7a07ae 100644
--- a/Main.py
+++ b/Main.py
@@ -10,53 +10,57 @@
 import sys
 import codecs
 
+EXIT = 99
+BACK = 66
+
 
 def NewsSources():
-    NewsSources=ConfigurationReader().GetWebsiteSupported()
+    NewsSources = ConfigurationReader().GetWebsiteSupported()
     return NewsSources
 
+
 def App():
-    newsSources=NewsSources()
+    newsSources = NewsSources()
     while True:
-        for i in xrange(len(newsSources)):
-            print ("["+str(i)+"]" +"\t" +newsSources[i])
-        print ("Please enter the index of the news source or press 99 to quit")
+        for i, newsSource in enumerate(newsSources):
+            print "[%s] \t %s " % (i, newsSource)
+        print "Please enter the index of the news source or press 99 to quit"
         try:
-            newsSourceNumber=raw_input("News Source Number >>>> ")
-        except ValueError:  
-            print ("That is not a valid News Source Number")
-        newsSourceNumber=int(newsSourceNumber)
-        if newsSourceNumber==99:
+            newsSourceNumber = raw_input("News Source Number >>>> ")
+        except ValueError:
+            print "That is not a valid News Source Number"
+        newsSourceNumber = int(newsSourceNumber)
+        if newsSourceNumber == EXIT:
             sys.exit()
-        if (newsSourceNumber >=len(newsSources)):
-            print ("Please select the index no less than "+ str(len(newsSources)))  
-        obj=NewsPulling(newsSources[newsSourceNumber])
-        Articles=obj.BeautifyArticles();   
+        if (newsSourceNumber >= len(newsSources)):
+            print "Please select the index no less than %s" % len(newsSources)
+        obj = NewsPulling(newsSources[newsSourceNumber])
+        Articles = obj.BeautifyArticles()
         while True:
-            print ("Do you want to read any story further? If yes, please select the number corresponding to the article")
-            print ("Press 66 to go back to the main menu")
-            print ("Press 99 to quit")
+            print "Do you want to read any story further? If yes, please select the number corresponding to the article"
+            print "Press 66 to go back to the main menu"
+            print "Press 99 to quit"
             try:
-                articleNumber=raw_input("Article No >>>> ")
+                articleNumber = int(raw_input("Article No >>>> "))
             except ValueError:
-                print ("That is not a valid Article Number")
-            articleNumber=int(articleNumber)
-            if articleNumber==99 :
+                print("That is not a valid Article Number")
+                continue
+            if articleNumber == EXIT:
                 sys.exit()
-            elif articleNumber==66 :
+            elif articleNumber == BACK:
                 break
-            elif (articleNumber >= len(Articles)):
-                print ("Please select the index no less than "+ str(len(Articles)))
-            #print Articles[articleNumber][2]
+            elif articleNumber >= len(Articles):
+                print "Please select the index no less than %s" % len(Articles)
             else:
-                extr=ExtractMainContent(newsSources[newsSourceNumber],Articles[articleNumber][2])
+                extr = ExtractMainContent(newsSources[newsSourceNumber],
+                                          Articles[articleNumber][2])
                 extr.Beautify()
-                print ("Do you want to save this article in file")
+                print("Do you want to save this article in file")
                 YesorNo = int(raw_input("Press 1 to save else press 0 to continue >>> "))
                 if YesorNo == 1:
                     extr.FileSave()
-                    
 
-if __name__== "__main__":
+
+if __name__ == "__main__":
     sys.stdout = codecs.getwriter('utf8')(sys.stdout)
-    App();
+    App()
diff --git a/NewsPulling.py b/NewsPulling.py
index 0b5e3e5..fc5c852 100644
--- a/NewsPulling.py
+++ b/NewsPulling.py
@@ -10,19 +10,22 @@
 from requests import ConnectionError
 import sys
 
+
 class NewsPulling(object):
     """This class is used to pull news from the internet depending on the source specified """
-    def __init__(self,newsSource):
-        self.Source=newsSource
-        
+
+    def __init__(self, newsSource):
+        self.Source = newsSource
+
     def PullNews(self):
         Configuration = ConfigurationReader()
-        self.__APIKey=Configuration.GetAPIKEY()
-        self.__Limit=Configuration.GetLimit()
-        url='https://newsapi.org/v1/articles?source='+self.Source+'&sortBy=top&apiKey='+self.__APIKey
+        self.__APIKey = Configuration.GetAPIKEY()
+        self.__Limit = Configuration.GetLimit()
+        url = 'https://newsapi.org/v1/articles?source=' + \
+            self.Source + '&sortBy=top&apiKey=' + self.__APIKey
         try:
-            req=requests.get(url)
-            if(req.status_code==200):
+            req = requests.get(url)
+            if(req.status_code == 200):
                 return req
             else:
                 print "There is some issue in connecting to the internet. Please check your firewall or internet"
@@ -30,64 +33,62 @@ def PullNews(self):
             print "A connection Attempt failed"
             print e.message
             sys.exit()
-    
+
     def JsonRead(self):
-        req=self.PullNews()
+        req = self.PullNews()
         # indicate if we need to convert to utf-8
         needsconversion = False
         if req.encoding != 'utf-8':
             needsconversion = True
-        req=req.json()
-        articles=req['articles']
-        noofarticles=len(articles)
-        maxarticles=min(noofarticles,self.__Limit)
-        
-        FilteredArticles=[]
-        
+        req = req.json()
+        articles = req['articles']
+        noofarticles = len(articles)
+        maxarticles = min(noofarticles, self.__Limit)
+
+        FilteredArticles = []
+
         for i in xrange(maxarticles):
-            article=articles[i]
-            #print article
+            article = articles[i]
             if needsconversion:
-                description=unicode(article['description'], 'utf-8')
-                #print description
-                title=unicode(article['title'], 'utf-8')
-                Article_url=unicode(article['url'], 'utf-8')
-                DateofPublication=unicode(article['publishedAt'], 'utf-8')
-                Author=unicode(article['author'], 'utf-8')
-                FilteredArticles.append([description,title,Article_url,DateofPublication,Author])
+                description = unicode(article['description'], 'utf-8')
+                title = unicode(article['title'], 'utf-8')
+                Article_url = unicode(article['url'], 'utf-8')
+                DateofPublication = unicode(article['publishedAt'], 'utf-8')
+                Author = unicode(article['author'], 'utf-8')
+                FilteredArticles.append([description,
+                                         title,
+                                         Article_url,
+                                         DateofPublication,
+                                         Author])
             else:
-                description=article['description']
-                #print description
-                title=article['title']
-                Article_url=article['url']
-                DateofPublication=article['publishedAt']
-                Author=article['author']
-                FilteredArticles.append([description,title,Article_url,DateofPublication,Author])
+                description = article['description']
+                title = article['title']
+                Article_url = article['url']
+                DateofPublication = article['publishedAt']
+                Author = article['author']
+                FilteredArticles.append([description,
+                                         title,
+                                         Article_url,
+                                         DateofPublication,
+                                         Author])
         return FilteredArticles
-            
-        #jsondict=json.load(req.json())
-        #print jsondict
-        
+
     def BeautifyArticles(self):
-        self.Articles=self.JsonRead()
-        if self.Articles is None or len(self.Articles)==0:
+        self.Articles = self.JsonRead()
+        if self.Articles is None or len(self.Articles) == 0:
             print "No articles found"
             sys.exit()
         print "=================STORIES=================================="
         for i in xrange(len(self.Articles)):
-            print "[" +str(i) +"]",
-           # print(sequence,end='') used for python 3.x
+            print "[" + str(i) + "]",
+            # print(sequence,end='') used for python 3.x
             if self.Articles[i][1] is not None:
-                print "\t"+self.Articles[i][1]
+                print "\t" + self.Articles[i][1]
             if self.Articles[i][0] is not None:
-                print "\t"+self.Articles[i][0]
+                print "\t" + self.Articles[i][0]
             if self.Articles[i][4] is not None:
-                print "\t"+self.Articles[i][4]
+                print "\t" + self.Articles[i][4]
             if self.Articles[i][3] is not None:
-                print "\t"+self.Articles[i][3]+"\n"
+                print "\t" + self.Articles[i][3] + "\n"
         print "***************************************************************"
-        return self.Articles 
-    
-        
-        
-        
+        return self.Articles
diff --git a/configReader.py b/configReader.py
index a8b90c4..8bf5368 100644
--- a/configReader.py
+++ b/configReader.py
@@ -6,31 +6,29 @@
 """
 import yaml
 
-##to do
-#implement singleton pattern here
+
+# to do
+# implement singleton pattern here
 class ConfigurationReader():
-    __APIKEY=None
-    __WebsiteSupported=[]
-    __Limit=None
+    __APIKEY = None
+    __WebsiteSupported = []
+    __Limit = None
+
     def __init__(self):
         with open("config.yml", 'r') as ymlfile:
             cfg = yaml.load(ymlfile)
-        ConfigurationReader.__APIKEY=cfg['Apikey']
-        #print ConfigurationReader.__APIKEY
-        ConfigurationReader.__Limit=cfg['Limit']
-        #print ConfigurationReader.__Limit
-        ConfigurationReader.__WebsiteSupported=cfg['WebsiteSupported']
-        #print ConfigurationReader.__WebsiteSupported
-    @staticmethod    
+        ConfigurationReader.__APIKEY = cfg['Apikey']
+        ConfigurationReader.__Limit = cfg['Limit']
+        ConfigurationReader.__WebsiteSupported = cfg['WebsiteSupported']
+
+    @staticmethod
     def GetAPIKEY():
         return ConfigurationReader.__APIKEY
-    
+
     @staticmethod
     def GetLimit():
         return ConfigurationReader.__Limit
-    
+
     @staticmethod
     def GetWebsiteSupported():
         return ConfigurationReader.__WebsiteSupported
-    
-    
\ No newline at end of file