From be935653835fa256964ad70ba5711bd29845febe Mon Sep 17 00:00:00 2001 From: Andres Rivero Date: Fri, 8 Dec 2017 11:05:44 -0800 Subject: [PATCH] added saved articles to be displayed if present, with possibility to delete them after reading them. changed the format in which the files are saved to json --- ExtractMainContent.py | 22 ++++--- Main.py | 20 ++++++- SavedArticle.py | 136 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 9 deletions(-) create mode 100644 SavedArticle.py diff --git a/ExtractMainContent.py b/ExtractMainContent.py index 2677a1e..c9dd791 100644 --- a/ExtractMainContent.py +++ b/ExtractMainContent.py @@ -4,10 +4,13 @@ @author: Ankit Singh """ +import os import requests from configReader import ConfigurationReader from Extractor import * import textwrap +import json +import re class ExtractMainContent(object): def __init__(self,source,articleurl): @@ -50,10 +53,15 @@ def Beautify(self): def FileSave(self): title,output=self.Extract() - article_file = open(title+".txt","w+") - article_file.write(output.encode('utf-8')) - article_file.close() - - - - + jsonFile = { + 'title': title, + 'content': output.encode('utf-8'), + 'url': self.url, + } + fileName = re.sub('[^\d\w]+', '_', title) + thisFileName = os.path.dirname(__file__) + thisPathName = os.path.abspath(thisFileName) + filePath = os.path.join(thisPathName, 'saved', fileName) + with open("%s.json" %filePath,"w+") as article_file: + #article_file.write(output.encode('utf-8')) + json.dump(jsonFile, article_file, indent=4) \ No newline at end of file diff --git a/Main.py b/Main.py index bea51da..43b34fa 100644 --- a/Main.py +++ b/Main.py @@ -7,8 +7,10 @@ from NewsPulling import NewsPulling from configReader import ConfigurationReader from ExtractMainContent import ExtractMainContent +from SavedArticle import Article, SavedArticle import sys import codecs +import os def NewsSources(): @@ -17,10 +19,13 @@ def NewsSources(): def App(): newsSources=NewsSources() + savedArticles=SavedArticle() while True: for i in xrange(len(newsSources)): print ("["+str(i)+"]" +"\t" +newsSources[i]) - print ("Please enter the index of the news source or press 99 to quit") + print ("Please enter the index of the news source") + print("Press 88 to check saved articles") + print("Press 99 to quit") try: newsSourceNumber=raw_input("News Source Number >>>> ") except ValueError: @@ -28,8 +33,19 @@ def App(): newsSourceNumber=int(newsSourceNumber) if newsSourceNumber==99: sys.exit() + elif newsSourceNumber == 88: + while True: + try: + savedArticles.mainLoop() + except ValueError as e: + print(e) + break + except Exception as e: + print(e) + break if (newsSourceNumber >=len(newsSources)): - print ("Please select the index no less than "+ str(len(newsSources))) + print ("Please select the index no less than "+ str(len(newsSources))) + continue obj=NewsPulling(newsSources[newsSourceNumber]) Articles=obj.BeautifyArticles(); while True: diff --git a/SavedArticle.py b/SavedArticle.py new file mode 100644 index 0000000..7cbf1c0 --- /dev/null +++ b/SavedArticle.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +""" +Created on Dec 5 '17 + +@author: Andres Rivero +""" +import os +import sys +import json +import textwrap + +class Article(object): + """ Article defines an article object. + Articles are composed of its title and content. + """ + def __init__(self, fileName): + self.path = fileName + with open(fileName) as f: + self.data = json.load(f) + self.textWrap=textwrap.TextWrapper(initial_indent='\t',subsequent_indent='\t',width=100) + + def __str__(self): + foo = self.data.get('title', "No Title") + bar = foo.encode('ascii', 'ignore').decode('ascii') + return bar + + def getPath(self): + return self.path + + def getTitle(self): + """getTitle() + return: (string) title name of article + """ + return self.data.get('title', "") + + def getContent(self): + """getContent() + return: (string) full content of article (if available) + """ + return self.data.get('content', "") + + def getUrl(self): + """getUrl() + return: (string) saved url for article + """ + return self.data.get('url', "") + + def delete(self): + print "path is: %s" %self.path + if os.path.exists(self.path): + print "it exists" + os.remove(self.path) + + def prettyPrint(self): + """prettyPrint() + uses textwrap to wrap whole content in terminal screen + """ + print "==========================================================================" + print "\t"+self.getTitle() + print "==========================================================================" + print(self.textWrap.fill(self.getContent())) #wrap of the line + print "\n\n%s" %self.getUrl() + print "*********************************************************************************" + print "\n\n" + +class SavedArticle(object): + """SavedArticle defines an object list with articles. + The saved articles are stored under /saved, where path + is the location where the script executes. + """ + def __init__(self): + self.folderPath = os.path.abspath(os.path.join(os.path.dirname(__file__), 'saved')) + self.savedArticleList = list() + + def loadArticle(self, index): + return self.savedArticleList[index] + + def getFolderPath(self): + return self.folderPath + + def getSavedArticleList(self): + self.savedArticleList = self.fillSavedArticleList() + return self.savedArticleList + + def printSavedArticleMenu(self): + articleList = self.getSavedArticleList() + for i, v in enumerate(articleList): + print "[%d]\t'%s" %(i, v) + print("Please enter the index of the news source") + print("Press 66 to go back") + print("Press 99 to quit") + + def fillSavedArticleList(self): + """fillSavedArticle() + Checks the files in /saved folder and appends to savedList + if the file was not already in the list. + """ + if not os.path.exists(self.folderPath): + os.makedirs(self.folderPath) + if len(os.listdir(self.folderPath)) == 0: + print "\n\t\tEmpty folder\n" + for f in os.listdir(self.folderPath): + thisPathName = os.path.abspath(os.path.dirname(__file__)) + filePath = os.path.join(thisPathName, 'saved', f) + article = Article(filePath) + #if article not in self.savedArticleList: + if not any(x for x in self.savedArticleList if x.getPath() == filePath): + self.savedArticleList.append(article) + return self.savedArticleList + + def mainLoop(self): + """mainLoop() + prints the list of articles that are saved, then asks for user input + to select article to read, or go back to main menu. if article is selected + it prints it to the terminal + """ + self.printSavedArticleMenu() + articleList = self.getSavedArticleList() + savedSourceNumber=raw_input("Saved Source Number >>>> ") + if not savedSourceNumber.isdigit(): + raise ValueError("value '%s' cannot be converted to integer" %savedSourceNumber) + savedSourceNumber = int(savedSourceNumber) + if savedSourceNumber == 99: + sys.exit() + elif savedSourceNumber == 66: + raise Exception("going to back to main menu") + + if savedSourceNumber < 0 or savedSourceNumber > len(articleList) - 1: + raise ValueError("value '%d' is not a valid saved source number" %savedSourceNumber) + else: + article = articleList[savedSourceNumber] + article.prettyPrint() + YesorNo = int(raw_input("Press 1 to delete article else press 0 to continue >>> ")) + if YesorNo == 1: + del self.savedArticleList[self.savedArticleList.index(article)] + article.delete() \ No newline at end of file