diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/News/ExtractMainContent.py b/News/ExtractMainContent.py index 2677a1e..fc6195d 100644 --- a/News/ExtractMainContent.py +++ b/News/ExtractMainContent.py @@ -3,6 +3,9 @@ Created on Jul 24 21:42:05 2016-2017 @author: Ankit Singh + +Updated on June 6 2016 +@author: Samantha Ryan """ import requests from configReader import ConfigurationReader diff --git a/News/Extractor.py b/News/Extractor.py index 2e95af7..504de43 100644 --- a/News/Extractor.py +++ b/News/Extractor.py @@ -14,6 +14,7 @@ def ExtractionAlgo(self,text): def TextExtractionAlgo(self,text,htmlelement,classname): soup=BeautifulSoup(text,'html.parser') title=soup.title.string + Result=[] #print soup maincontent=soup.find_all(htmlelement, class_=classname) @@ -27,7 +28,8 @@ def TextExtractionAlgo(self,text,htmlelement,classname): Result=''.join(Result) return (title,Result) - + + class HuffingtonPost(Extractor): """class for Huffington Post parsing""" diff --git a/News/Main.py b/News/Main.py index a63d3cf..1165d23 100644 --- a/News/Main.py +++ b/News/Main.py @@ -3,6 +3,9 @@ Created on Jul 24 16:15:11 2016-2017 @author: Ankit Singh + +Updated on June 6 2016 +@author: Samantha Ryan """ from NewsPulling import NewsPulling from configReader import ConfigurationReader @@ -15,12 +18,19 @@ def NewsSources(): NewsSources=ConfigurationReader().GetWebsiteSupported() return NewsSources -def App(): +def NewsTopics(): + NewsTopics=ConfigurationReader().GetTopicsSupported() + return NewsTopics + +def Source(): newsSources=NewsSources() + while True: for i in xrange(len(newsSources)): print ("["+str(i)+"]" +"\t" +newsSources[i]) - print ("Please enter the index of the news source or press 99 to quit") + print ("Please enter the index of the news source") + print( "Press 99 to quit") + print( "Press 66 to return to main menu") try: newsSourceNumber=raw_input("News Source Number >>>> ") except ValueError: @@ -28,10 +38,12 @@ def App(): newsSourceNumber=int(newsSourceNumber) if newsSourceNumber==99: sys.exit() + if newsSourceNumber==66: + Choice() if (newsSourceNumber >=len(newsSources)): print ("Please select the index no less than "+ str(len(newsSources))) obj=NewsPulling(newsSources[newsSourceNumber]) - Articles=obj.BeautifyArticles(); + Articles=obj.BeautifySourceArticles(); while True: print ("Do you want to read any story further? If yes, please select the number corresponding to the article") print ("Press 66 to go back to the main menu") @@ -55,14 +67,79 @@ def App(): YesorNo = int(raw_input("Press 1 to save else press 0 to continue >>> ")) if YesorNo == 1: extr.FileSave() - +def Topic(): + newsTopics = NewsTopics() + + while True: + for i in xrange(len(newsTopics)): + print ("[" + str(i) + "]" + "\t" + newsTopics[i]) + print ("Please enter the index of the news topic") + print("Press 99 to quit") + print("Press 66 to return to main menu") + try: + newsTopicNumber = raw_input("News Topic Number >>>> ") + except ValueError: + print ("That is not a valid News Source Number") + newsTopicNumber = int(newsTopicNumber) + if newsTopicNumber == 99: + sys.exit() + if newsTopicNumber == 66: + Choice() + if (newsTopicNumber >= len(newsTopics)): + print ("Please select the index no less than " + str(len(newsTopics))) + obj = NewsPulling(newsTopics[newsTopicNumber]) + Articles = obj.BeautifyTopicArticles(); + while True: + print ( + "Do you want to read any story further? If yes, please select the number corresponding to the article") + print ("Press 66 to go back to the main menu") + print ("Press 99 to quit") + try: + articleNumber = raw_input("Article No >>>> ") + except ValueError: + print ("That is not a valid Article Number") + articleNumber = int(articleNumber) + if articleNumber == 99: + sys.exit() + elif articleNumber == 66: + break + elif (articleNumber >= len(Articles)): + print ("Please select the index no less than " + str(len(Articles))) + # print Articles[articleNumber][2] + else: + extr = ExtractMainContent(newsTopics[newsTopicNumber], Articles[articleNumber][2]) + extr.Beautify() + print ("Do you want to save this article in file") + YesorNo = int(raw_input("Press 1 to save else press 0 to continue >>> ")) + if YesorNo == 1: + extr.FileSave() + + +def Choice(): + print ("How would you like to read your news?\n\n") + print ("[0] By News Source") + print ("[1] By News Topic") + print ("[99] To Quit") + + try: + choice = raw_input(">>>> ") + except ValueError: + print ("That is not a valid Option") + choice = int(choice) + if choice == 99: + sys.exit() + if choice == 0: + Source() + if choice == 1: + Topic() def Main(): sys.stdout = codecs.getwriter('utf8')(sys.stdout) - App() + Choice() + if __name__ == '__main__': Main() diff --git a/News/NewsPulling.py b/News/NewsPulling.py index 0b5e3e5..d4014f7 100644 --- a/News/NewsPulling.py +++ b/News/NewsPulling.py @@ -3,6 +3,9 @@ Created on Jul 24 20:01:34 2016-2017 @author: Ankit Singh + +Updated on June 6 2016 +@author: Samantha Ryan """ import requests @@ -11,14 +14,20 @@ import sys class NewsPulling(object): + + """This class is used to pull news from the internet depending on the source specified """ def __init__(self,newsSource): self.Source=newsSource - - def PullNews(self): Configuration = ConfigurationReader() - self.__APIKey=Configuration.GetAPIKEY() - self.__Limit=Configuration.GetLimit() + self.__APIKey = Configuration.GetAPIKEY() + self.__Limit = Configuration.GetLimit() + + + # Pulls news stories based on SOURCE -- self.Source will indicate the correctly formatted + # news source to be included in the URL + def PullNews(self): + url='https://newsapi.org/v1/articles?source='+self.Source+'&sortBy=top&apiKey='+self.__APIKey try: req=requests.get(url) @@ -30,9 +39,30 @@ def PullNews(self): print "A connection Attempt failed" print e.message sys.exit() - - def JsonRead(self): + + + # Pulls news stories based on TOPIC -- self.Source will indicate the correctly formatted + # category to be included in the URL + # Pulling by Topic is limited to US-based news stories + def PullTopics(self): + url='https://newsapi.org/v2/top-headlines?country=us&category='+self.Source+'&apiKey='+self.__APIKey + try: + req = requests.get(url) + if (req.status_code == 200): + return req + else: + print "There is some issue in connecting to the internet. Please check your firewall or internet" + except ConnectionError as e: + print "A connection Attempt failed" + print e.message + sys.exit() + + + + def JsonSourceRead(self): + req=self.PullNews() + # indicate if we need to convert to utf-8 needsconversion = False if req.encoding != 'utf-8': @@ -67,9 +97,48 @@ def JsonRead(self): #jsondict=json.load(req.json()) #print jsondict + + + def JsonTopicRead(self): + + req = self.PullTopics() + + # indicate if we need to convert to utf-8 + needsconversion = False + if req.encoding != 'utf-8': + needsconversion = True + req = req.json() + articles = req['articles'] + noofarticles = len(articles) + maxarticles = min(noofarticles, self.__Limit) + + FilteredArticles = [] + + for i in xrange(maxarticles): + article = articles[i] + # print article + if needsconversion: + description = unicode(article['description'], 'utf-8') + # print description + title = unicode(article['title'], 'utf-8') + Article_url = unicode(article['url'], 'utf-8') + DateofPublication = unicode(article['publishedAt'], 'utf-8') + Author = unicode(article['author'], 'utf-8') + FilteredArticles.append([description, title, Article_url, DateofPublication, Author]) + else: + description = article['description'] + # print description + title = article['title'] + Article_url = article['url'] + DateofPublication = article['publishedAt'] + Author = article['author'] + FilteredArticles.append([description, title, Article_url, DateofPublication, Author]) + return FilteredArticles + + - def BeautifyArticles(self): - self.Articles=self.JsonRead() + def BeautifySourceArticles(self): + self.Articles=self.JsonSourceRead() if self.Articles is None or len(self.Articles)==0: print "No articles found" sys.exit() @@ -86,8 +155,26 @@ def BeautifyArticles(self): if self.Articles[i][3] is not None: print "\t"+self.Articles[i][3]+"\n" print "***************************************************************" - return self.Articles - - + return self.Articles + + def BeautifyTopicArticles(self): + self.Articles = self.JsonTopicRead() + if self.Articles is None or len(self.Articles) == 0: + print "No articles found" + sys.exit() + print "=================STORIES==================================" + for i in xrange(len(self.Articles)): + print "[" + str(i) + "]", + # print(sequence,end='') used for python 3.x + if self.Articles[i][1] is not None: + print "\t" + self.Articles[i][1] + if self.Articles[i][0] is not None: + print "\t" + self.Articles[i][0] + if self.Articles[i][4] is not None: + print "\t" + self.Articles[i][4] + if self.Articles[i][3] is not None: + print "\t" + self.Articles[i][3] + "\n" + print "***************************************************************" + return self.Articles diff --git a/News/configReader.py b/News/configReader.py index a8b90c4..b85777f 100644 --- a/News/configReader.py +++ b/News/configReader.py @@ -11,9 +11,10 @@ class ConfigurationReader(): __APIKEY=None __WebsiteSupported=[] + __TopicsSupported=[] __Limit=None def __init__(self): - with open("config.yml", 'r') as ymlfile: + with open("/Users/Stormy/PycharmProjects/News-At-Command-Line/config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) ConfigurationReader.__APIKEY=cfg['Apikey'] #print ConfigurationReader.__APIKEY @@ -21,6 +22,7 @@ def __init__(self): #print ConfigurationReader.__Limit ConfigurationReader.__WebsiteSupported=cfg['WebsiteSupported'] #print ConfigurationReader.__WebsiteSupported + ConfigurationReader.__TopicsSupported=cfg['TopicsSupported'] @staticmethod def GetAPIKEY(): return ConfigurationReader.__APIKEY @@ -32,5 +34,9 @@ def GetLimit(): @staticmethod def GetWebsiteSupported(): return ConfigurationReader.__WebsiteSupported + + @staticmethod + def GetTopicsSupported(): + return ConfigurationReader.__TopicsSupported \ No newline at end of file diff --git a/README.md b/README.md index 6c24360..74e6032 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Now you don't need to visit each news portal website separately to read their articles. Just use the tool and read the articles without even clicking a single mouse button. +### Updates June 6, 2018: Users can now choose to read news articles based on news source or topic. # Modules Requirements diff --git a/config.yml b/config.yml index 54199a7..85718f2 100644 --- a/config.yml +++ b/config.yml @@ -10,6 +10,8 @@ #Created on Jul 20 11:43:52 2016 #@author: Ankit Singh +#Updated on June 6 2016 +#@author: Samantha Ryan WebsiteSupported: @@ -22,12 +24,21 @@ WebsiteSupported: - the-guardian-uk - the-hindu - the-times-of-india - # - The WashingtonPost - # - TheNewyorkTimes + # - The WashingtonPost + # - TheNewyorkTimes #No of posts to be displayed in the one go Limit: 10 +#Topics Supported Based on US News Sources +TopicsSupported: + - business + - entertainment + - health + - science + - sports + - technology + Apikey: bda5818cc2af461e98330ccdf6fb9cbe \ No newline at end of file