Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions ExtractMainContent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

@author: Ankit Singh
"""
import os
import requests
from configReader import ConfigurationReader
from Extractor import *
import textwrap
import json
import re

class ExtractMainContent(object):
def __init__(self,source,articleurl):
Expand Down Expand Up @@ -50,10 +53,15 @@ def Beautify(self):

def FileSave(self):
title,output=self.Extract()
article_file = open(title+".txt","w+")
article_file.write(output.encode('utf-8'))
article_file.close()




jsonFile = {
'title': title,
'content': output.encode('utf-8'),
'url': self.url,
}
fileName = re.sub('[^\d\w]+', '_', title)
thisFileName = os.path.dirname(__file__)
thisPathName = os.path.abspath(thisFileName)
filePath = os.path.join(thisPathName, 'saved', fileName)
with open("%s.json" %filePath,"w+") as article_file:
#article_file.write(output.encode('utf-8'))
json.dump(jsonFile, article_file, indent=4)
20 changes: 18 additions & 2 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from NewsPulling import NewsPulling
from configReader import ConfigurationReader
from ExtractMainContent import ExtractMainContent
from SavedArticle import Article, SavedArticle
import sys
import codecs
import os


def NewsSources():
Expand All @@ -17,19 +19,33 @@ def NewsSources():

def App():
newsSources=NewsSources()
savedArticles=SavedArticle()
while True:
for i in xrange(len(newsSources)):
print ("["+str(i)+"]" +"\t" +newsSources[i])
print ("Please enter the index of the news source or press 99 to quit")
print ("Please enter the index of the news source")
print("Press 88 to check saved articles")
print("Press 99 to quit")
try:
newsSourceNumber=raw_input("News Source Number >>>> ")
except ValueError:
print ("That is not a valid News Source Number")
newsSourceNumber=int(newsSourceNumber)
if newsSourceNumber==99:
sys.exit()
elif newsSourceNumber == 88:
while True:
try:
savedArticles.mainLoop()
except ValueError as e:
print(e)
break
except Exception as e:
print(e)
break
if (newsSourceNumber >=len(newsSources)):
print ("Please select the index no less than "+ str(len(newsSources)))
print ("Please select the index no less than "+ str(len(newsSources)))
continue
obj=NewsPulling(newsSources[newsSourceNumber])
Articles=obj.BeautifyArticles();
while True:
Expand Down
136 changes: 136 additions & 0 deletions SavedArticle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
"""
Created on Dec 5 '17

@author: Andres Rivero
"""
import os
import sys
import json
import textwrap

class Article(object):
""" Article defines an article object.
Articles are composed of its title and content.
"""
def __init__(self, fileName):
self.path = fileName
with open(fileName) as f:
self.data = json.load(f)
self.textWrap=textwrap.TextWrapper(initial_indent='\t',subsequent_indent='\t',width=100)

def __str__(self):
foo = self.data.get('title', "No Title")
bar = foo.encode('ascii', 'ignore').decode('ascii')
return bar

def getPath(self):
return self.path

def getTitle(self):
"""getTitle()
return: (string) title name of article
"""
return self.data.get('title', "")

def getContent(self):
"""getContent()
return: (string) full content of article (if available)
"""
return self.data.get('content', "")

def getUrl(self):
"""getUrl()
return: (string) saved url for article
"""
return self.data.get('url', "")

def delete(self):
print "path is: %s" %self.path
if os.path.exists(self.path):
print "it exists"
os.remove(self.path)

def prettyPrint(self):
"""prettyPrint()
uses textwrap to wrap whole content in terminal screen
"""
print "=========================================================================="
print "\t"+self.getTitle()
print "=========================================================================="
print(self.textWrap.fill(self.getContent())) #wrap of the line
print "\n\n%s" %self.getUrl()
print "*********************************************************************************"
print "\n\n"

class SavedArticle(object):
"""SavedArticle defines an object list with articles.
The saved articles are stored under <path>/saved, where path
is the location where the script executes.
"""
def __init__(self):
self.folderPath = os.path.abspath(os.path.join(os.path.dirname(__file__), 'saved'))
self.savedArticleList = list()

def loadArticle(self, index):
return self.savedArticleList[index]

def getFolderPath(self):
return self.folderPath

def getSavedArticleList(self):
self.savedArticleList = self.fillSavedArticleList()
return self.savedArticleList

def printSavedArticleMenu(self):
articleList = self.getSavedArticleList()
for i, v in enumerate(articleList):
print "[%d]\t'%s" %(i, v)
print("Please enter the index of the news source")
print("Press 66 to go back")
print("Press 99 to quit")

def fillSavedArticleList(self):
"""fillSavedArticle()
Checks the files in /saved folder and appends to savedList
if the file was not already in the list.
"""
if not os.path.exists(self.folderPath):
os.makedirs(self.folderPath)
if len(os.listdir(self.folderPath)) == 0:
print "\n\t\tEmpty folder\n"
for f in os.listdir(self.folderPath):
thisPathName = os.path.abspath(os.path.dirname(__file__))
filePath = os.path.join(thisPathName, 'saved', f)
article = Article(filePath)
#if article not in self.savedArticleList:
if not any(x for x in self.savedArticleList if x.getPath() == filePath):
self.savedArticleList.append(article)
return self.savedArticleList

def mainLoop(self):
"""mainLoop()
prints the list of articles that are saved, then asks for user input
to select article to read, or go back to main menu. if article is selected
it prints it to the terminal
"""
self.printSavedArticleMenu()
articleList = self.getSavedArticleList()
savedSourceNumber=raw_input("Saved Source Number >>>> ")
if not savedSourceNumber.isdigit():
raise ValueError("value '%s' cannot be converted to integer" %savedSourceNumber)
savedSourceNumber = int(savedSourceNumber)
if savedSourceNumber == 99:
sys.exit()
elif savedSourceNumber == 66:
raise Exception("going to back to main menu")

if savedSourceNumber < 0 or savedSourceNumber > len(articleList) - 1:
raise ValueError("value '%d' is not a valid saved source number" %savedSourceNumber)
else:
article = articleList[savedSourceNumber]
article.prettyPrint()
YesorNo = int(raw_input("Press 1 to delete article else press 0 to continue >>> "))
if YesorNo == 1:
del self.savedArticleList[self.savedArticleList.index(article)]
article.delete()