diff --git a/News/ExtractMainContent.py b/News/ExtractMainContent.py index 2677a1e..3ffb814 100644 --- a/News/ExtractMainContent.py +++ b/News/ExtractMainContent.py @@ -11,6 +11,7 @@ class ExtractMainContent(object): def __init__(self,source,articleurl): + #import pdb; pdb.set_trace(); self.extractorlist=[HuffingtonPost(),NYT(),BBC(),BloomBerg(),Guardian(),TheHindu(),TimesOfIndia()] websites=ConfigurationReader().GetWebsiteSupported() self.Mapping={} @@ -23,7 +24,18 @@ def __init__(self,source,articleurl): def DownloadContent(self): headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'} - req=requests.get(self.url,headers=headers) + Configuration = ConfigurationReader() + self.__ProxyIP=Configuration.GetProxyIP() + self.__ProxyPortNumber=Configuration.GetProxyPortNumber() + proxies = {} + if self.__ProxyIP and self.__ProxyPortNumber: + proxies = { + 'http': "http://{}:{}".format(self.__ProxyIP, + self.__ProxyPortNumber), + 'https': "https://{}:{}".format(self.__ProxyIP, + self.__ProxyPortNumber), + } + req=requests.get(self.url,headers=headers,proxies=proxies) return req.text def AddExtractorList(self,extractor): diff --git a/News/NewsPulling.py b/News/NewsPulling.py index a16aa5d..0472d8f 100644 --- a/News/NewsPulling.py +++ b/News/NewsPulling.py @@ -20,9 +20,19 @@ def PullNews(self): Configuration = ConfigurationReader() self.__APIKey=Configuration.GetAPIKEY() self.__Limit=Configuration.GetLimit() + self.__ProxyIP=Configuration.GetProxyIP() + self.__ProxyPortNumber=Configuration.GetProxyPortNumber() url='https://newsapi.org/v2/top-headlines?sources='+self.Source+'&sortBy=top&apiKey='+self.__APIKey + proxies = {} + if self.__ProxyIP and self.__ProxyPortNumber: + proxies = { + 'http': "http://{}:{}".format(self.__ProxyIP, + self.__ProxyPortNumber), + 'https': "https://{}:{}".format(self.__ProxyIP, + self.__ProxyPortNumber), + } try: - req=requests.get(url) + req=requests.get(url,proxies=proxies) if(req.status_code==200): return req else: diff --git a/News/configReader.py b/News/configReader.py index a8b90c4..172367d 100644 --- a/News/configReader.py +++ b/News/configReader.py @@ -12,6 +12,8 @@ class ConfigurationReader(): __APIKEY=None __WebsiteSupported=[] __Limit=None + __ProxyIP=None + __ProxyPortNumber=None def __init__(self): with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) @@ -21,6 +23,10 @@ def __init__(self): #print ConfigurationReader.__Limit ConfigurationReader.__WebsiteSupported=cfg['WebsiteSupported'] #print ConfigurationReader.__WebsiteSupported + ConfigurationReader.__ProxyIP = cfg['ProxyIP'] + # print ConfigurationReader.__ProxyIP + ConfigurationReader.__ProxyPortNumber = cfg['ProxyPortNumber'] + # print ConfigurationReader.__ProxyPortNumber @staticmethod def GetAPIKEY(): return ConfigurationReader.__APIKEY @@ -32,5 +38,13 @@ def GetLimit(): @staticmethod def GetWebsiteSupported(): return ConfigurationReader.__WebsiteSupported + + @staticmethod + def GetProxyIP(): + return ConfigurationReader.__ProxyIP + + @staticmethod + def GetProxyPortNumber(): + return ConfigurationReader.__ProxyPortNumber \ No newline at end of file diff --git a/README.md b/README.md index caf6f22..8c825ed 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ To install the module dependencies before running the application, simply naviga ### To install python setup.py install newsctl + ### If you are accessing the web through a proxy service: + Please add the proxy IP and port number to the config.yml file # Contributing We welcome your contributions. Please feel free to fork the code, play with it, make some patches and send us pull requests. diff --git a/config.yml b/config.yml index f124022..cead5fc 100644 --- a/config.yml +++ b/config.yml @@ -31,4 +31,7 @@ Limit: 10 Apikey: bda5818cc2af461e98330ccdf6fb9cbe +ProxyIP: +ProxyPortNumber: + \ No newline at end of file