Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions News/ExtractMainContent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
Created on Jul 24 21:42:05 2016-2017

@author: Ankit Singh

Updated on June 6 2016
@author: Samantha Ryan
"""
import requests
from configReader import ConfigurationReader
Expand Down
4 changes: 3 additions & 1 deletion News/Extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def ExtractionAlgo(self,text):
def TextExtractionAlgo(self,text,htmlelement,classname):
soup=BeautifulSoup(text,'html.parser')
title=soup.title.string

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra line

Result=[]
#print soup
maincontent=soup.find_all(htmlelement, class_=classname)
Expand All @@ -27,7 +28,8 @@ def TextExtractionAlgo(self,text,htmlelement,classname):
Result=''.join(Result)
return (title,Result)





class HuffingtonPost(Extractor):
"""class for Huffington Post parsing"""
Expand Down
87 changes: 82 additions & 5 deletions News/Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
Created on Jul 24 16:15:11 2016-2017

@author: Ankit Singh

Updated on June 6 2016
@author: Samantha Ryan
"""
from NewsPulling import NewsPulling
from configReader import ConfigurationReader
Expand All @@ -15,23 +18,32 @@ def NewsSources():
NewsSources=ConfigurationReader().GetWebsiteSupported()
return NewsSources

def App():
def NewsTopics():
NewsTopics=ConfigurationReader().GetTopicsSupported()
return NewsTopics

def Source():
newsSources=NewsSources()

while True:
for i in xrange(len(newsSources)):
print ("["+str(i)+"]" +"\t" +newsSources[i])
print ("Please enter the index of the news source or press 99 to quit")
print ("Please enter the index of the news source")
print( "Press 99 to quit")
print( "Press 66 to return to main menu")
try:
newsSourceNumber=raw_input("News Source Number >>>> ")
except ValueError:
print ("That is not a valid News Source Number")
newsSourceNumber=int(newsSourceNumber)
if newsSourceNumber==99:
sys.exit()
if newsSourceNumber==66:
Choice()
if (newsSourceNumber >=len(newsSources)):
print ("Please select the index no less than "+ str(len(newsSources)))
obj=NewsPulling(newsSources[newsSourceNumber])
Articles=obj.BeautifyArticles();
Articles=obj.BeautifySourceArticles();
while True:
print ("Do you want to read any story further? If yes, please select the number corresponding to the article")
print ("Press 66 to go back to the main menu")
Expand All @@ -55,14 +67,79 @@ def App():
YesorNo = int(raw_input("Press 1 to save else press 0 to continue >>> "))
if YesorNo == 1:
extr.FileSave()


def Topic():
newsTopics = NewsTopics()

while True:
for i in xrange(len(newsTopics)):
print ("[" + str(i) + "]" + "\t" + newsTopics[i])
print ("Please enter the index of the news topic")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move these print statement into some single function called disclaimer or anything that is intuitive.

print("Press 99 to quit")
print("Press 66 to return to main menu")
try:
newsTopicNumber = raw_input("News Topic Number >>>> ")
except ValueError:
print ("That is not a valid News Source Number")
newsTopicNumber = int(newsTopicNumber)
if newsTopicNumber == 99:
sys.exit()
if newsTopicNumber == 66:
Choice()
if (newsTopicNumber >= len(newsTopics)):
print ("Please select the index no less than " + str(len(newsTopics)))
obj = NewsPulling(newsTopics[newsTopicNumber])
Articles = obj.BeautifyTopicArticles();
while True:
print (
"Do you want to read any story further? If yes, please select the number corresponding to the article")
print ("Press 66 to go back to the main menu")
print ("Press 99 to quit")
try:
articleNumber = raw_input("Article No >>>> ")
except ValueError:
print ("That is not a valid Article Number")
articleNumber = int(articleNumber)
if articleNumber == 99:
sys.exit()
elif articleNumber == 66:
break
elif (articleNumber >= len(Articles)):
print ("Please select the index no less than " + str(len(Articles)))
# print Articles[articleNumber][2]
else:
extr = ExtractMainContent(newsTopics[newsTopicNumber], Articles[articleNumber][2])
extr.Beautify()
print ("Do you want to save this article in file")
YesorNo = int(raw_input("Press 1 to save else press 0 to continue >>> "))
if YesorNo == 1:
extr.FileSave()


def Choice():
print ("How would you like to read your news?\n\n")
print ("[0] By News Source")
print ("[1] By News Topic")
print ("[99] To Quit")

try:
choice = raw_input(">>>> ")
except ValueError:
print ("That is not a valid Option")
choice = int(choice)
if choice == 99:
sys.exit()
if choice == 0:
Source()
if choice == 1:
Topic()



def Main():
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
App()
Choice()


if __name__ == '__main__':
Main()
109 changes: 98 additions & 11 deletions News/NewsPulling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
Created on Jul 24 20:01:34 2016-2017

@author: Ankit Singh

Updated on June 6 2016
@author: Samantha Ryan
"""

import requests
Expand All @@ -11,14 +14,20 @@
import sys

class NewsPulling(object):

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extraline


"""This class is used to pull news from the internet depending on the source specified """
def __init__(self,newsSource):
self.Source=newsSource

def PullNews(self):
Configuration = ConfigurationReader()
self.__APIKey=Configuration.GetAPIKEY()
self.__Limit=Configuration.GetLimit()
self.__APIKey = Configuration.GetAPIKEY()
self.__Limit = Configuration.GetLimit()


# Pulls news stories based on SOURCE -- self.Source will indicate the correctly formatted
# news source to be included in the URL
def PullNews(self):

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move the URL to the parameter of the class while stories or topic as input to the method based on which either it will fetch stories or topics. or if possible create an interface and then inherits different class both of which it will handle differently based on their use case.

url='https://newsapi.org/v1/articles?source='+self.Source+'&sortBy=top&apiKey='+self.__APIKey
try:
req=requests.get(url)
Expand All @@ -30,9 +39,30 @@ def PullNews(self):
print "A connection Attempt failed"
print e.message
sys.exit()

def JsonRead(self):


# Pulls news stories based on TOPIC -- self.Source will indicate the correctly formatted
# category to be included in the URL
# Pulling by Topic is limited to US-based news stories
def PullTopics(self):
url='https://newsapi.org/v2/top-headlines?country=us&category='+self.Source+'&apiKey='+self.__APIKey
try:
req = requests.get(url)
if (req.status_code == 200):
return req
else:
print "There is some issue in connecting to the internet. Please check your firewall or internet"
except ConnectionError as e:
print "A connection Attempt failed"
print e.message
sys.exit()



def JsonSourceRead(self):

req=self.PullNews()

# indicate if we need to convert to utf-8
needsconversion = False
if req.encoding != 'utf-8':
Expand Down Expand Up @@ -67,9 +97,48 @@ def JsonRead(self):

#jsondict=json.load(req.json())
#print jsondict


def JsonTopicRead(self):

req = self.PullTopics()

# indicate if we need to convert to utf-8
needsconversion = False
if req.encoding != 'utf-8':
needsconversion = True
req = req.json()
articles = req['articles']
noofarticles = len(articles)
maxarticles = min(noofarticles, self.__Limit)

FilteredArticles = []

for i in xrange(maxarticles):
article = articles[i]
# print article
if needsconversion:
description = unicode(article['description'], 'utf-8')
# print description
title = unicode(article['title'], 'utf-8')
Article_url = unicode(article['url'], 'utf-8')
DateofPublication = unicode(article['publishedAt'], 'utf-8')
Author = unicode(article['author'], 'utf-8')
FilteredArticles.append([description, title, Article_url, DateofPublication, Author])
else:
description = article['description']
# print description
title = article['title']
Article_url = article['url']
DateofPublication = article['publishedAt']
Author = article['author']
FilteredArticles.append([description, title, Article_url, DateofPublication, Author])
return FilteredArticles



def BeautifyArticles(self):
self.Articles=self.JsonRead()
def BeautifySourceArticles(self):
self.Articles=self.JsonSourceRead()
if self.Articles is None or len(self.Articles)==0:
print "No articles found"
sys.exit()
Expand All @@ -86,8 +155,26 @@ def BeautifyArticles(self):
if self.Articles[i][3] is not None:
print "\t"+self.Articles[i][3]+"\n"
print "***************************************************************"
return self.Articles


return self.Articles

def BeautifyTopicArticles(self):
self.Articles = self.JsonTopicRead()
if self.Articles is None or len(self.Articles) == 0:
print "No articles found"
sys.exit()
print "=================STORIES=================================="
for i in xrange(len(self.Articles)):
print "[" + str(i) + "]",
# print(sequence,end='') used for python 3.x
if self.Articles[i][1] is not None:
print "\t" + self.Articles[i][1]
if self.Articles[i][0] is not None:
print "\t" + self.Articles[i][0]
if self.Articles[i][4] is not None:
print "\t" + self.Articles[i][4]
if self.Articles[i][3] is not None:
print "\t" + self.Articles[i][3] + "\n"
print "***************************************************************"
return self.Articles


8 changes: 7 additions & 1 deletion News/configReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,18 @@
class ConfigurationReader():
__APIKEY=None
__WebsiteSupported=[]
__TopicsSupported=[]
__Limit=None
def __init__(self):
with open("config.yml", 'r') as ymlfile:
with open("/Users/Stormy/PycharmProjects/News-At-Command-Line/config.yml", 'r') as ymlfile:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not right. This path will not hold valid for every user. Thus not correct. The earlier one will search for config.yml in the present directory and make sure it is the relative path and not the full path..

cfg = yaml.load(ymlfile)
ConfigurationReader.__APIKEY=cfg['Apikey']
#print ConfigurationReader.__APIKEY
ConfigurationReader.__Limit=cfg['Limit']
#print ConfigurationReader.__Limit
ConfigurationReader.__WebsiteSupported=cfg['WebsiteSupported']
#print ConfigurationReader.__WebsiteSupported
ConfigurationReader.__TopicsSupported=cfg['TopicsSupported']
@staticmethod
def GetAPIKEY():
return ConfigurationReader.__APIKEY
Expand All @@ -32,5 +34,9 @@ def GetLimit():
@staticmethod
def GetWebsiteSupported():
return ConfigurationReader.__WebsiteSupported

@staticmethod
def GetTopicsSupported():
return ConfigurationReader.__TopicsSupported


1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Now you don't need to visit each news portal website separately to read their articles. Just use the tool and read the articles without even clicking a single mouse button.

### Updates June 6, 2018: Users can now choose to read news articles based on news source or topic.

# Modules Requirements

Expand Down
15 changes: 13 additions & 2 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#Created on Jul 20 11:43:52 2016

#@author: Ankit Singh
#Updated on June 6 2016
#@author: Samantha Ryan


WebsiteSupported:
Expand All @@ -22,12 +24,21 @@ WebsiteSupported:
- the-guardian-uk
- the-hindu
- the-times-of-india
# - The WashingtonPost
# - TheNewyorkTimes
# - The WashingtonPost
# - TheNewyorkTimes

#No of posts to be displayed in the one go
Limit: 10

#Topics Supported Based on US News Sources
TopicsSupported:
- business
- entertainment
- health
- science
- sports
- technology

Apikey: bda5818cc2af461e98330ccdf6fb9cbe