From 07c2ce2cf2d1fe7d7e9cec04b1ad14a0bccc871b Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Mon, 8 Jun 2015 19:30:18 +0300 Subject: [PATCH 01/48] Secure requests were tried to implement. Configuration files paths were absolutized. --- InstaBot.py | 90 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/InstaBot.py b/InstaBot.py index 25941c1..af9d0e7 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,29 +1,44 @@ import mechanize, yaml, re, time, sys, pycurl, hmac, urllib from hashlib import sha256 +from os import path WEBSTA_URL = "http://websta.me/" WEBSTA_HASHTAG = WEBSTA_URL + "hot" -INSTAGRAM_API = "https://api.instagram.com/v1/media/" +INSTAGRAM_API = "https://api.instagram.com/v1" USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' -# Function to encode the string with the IP and ID of the picture then like it +def get_signature(endpoint, params, secret): + sig = endpoint + for key in sorted(params.keys()): + sig += '|%s=%s' % (key, params[key]) + return hmac.new(secret, sig, sha256).hexdigest() + def encodeAndRequest(id): - c = pycurl.Curl() - signature = hmac.new(str(profile['CREDENTIALS']['CLIENT_SECRET']), profile['IP'], sha256).hexdigest() - header = '|'.join([profile['IP'], signature]) - header = ["X-Insta-Forwarded-For: " + header] - - url = INSTAGRAM_API + id + "/likes" - c.setopt(c.URL, url) - c.setopt(c.POSTFIELDS, "access_token=" + str(profile['CREDENTIALS']['ACCESS_TOKEN'])) - c.setopt(pycurl.HTTPHEADER, header) - c.perform() - - response = str(c.getinfo(c.HTTP_CODE)) - c.close() - - return response + ''' Function to encode the string with the IP and ID of the picture then like it. ''' + c = pycurl.Curl() + '''signature = hmac.new(str(profile['CREDENTIALS']['CLIENT_SECRET']), profile['IP'], sha256).hexdigest() + header = '|'.join([profile['IP'], signature]) + header = ["X-Insta-Forwarded-For: " + header]''' + endpoint = '/media/' + id + '/likes' + + post_data_dict = { + 'access_token': profile['CREDENTIALS']['ACCESS_TOKEN'], + } + post_data_dict['sig'] = get_signature(endpoint, post_data_dict, profile['CREDENTIALS']['CLIENT_SECRET']) + print post_data_dict, endpoint + post_data = urllib.urlencode(post_data_dict) + + url = INSTAGRAM_API + endpoint + c.setopt(c.URL, url) + c.setopt(c.POSTFIELDS, post_data) + #c.setopt(pycurl.HTTPHEADER, header) + c.perform() + + response = str(c.getinfo(c.HTTP_CODE)) + c.close() + + return response # Function to parse the Top HashTag page and get the current top hashtags def getTopHashTags(br): @@ -32,10 +47,9 @@ def getTopHashTags(br): return topHashtags # Function to read the hashtags from a users file if not wanting to parse the top 100 -def getHashtagsFromFile(): +def getHashtagsFromFile(filename): #your list of hashtags hashtags = [] - filename = 'hashtags.txt' #Hashtag file input f = open(filename) #strips newline character @@ -89,21 +103,23 @@ def like(br, hashtags): print "YOU LIKED " + str(likes) + " photos" if __name__ == "__main__": - - print "=================================" - print " InstaBot " - print " Developed by Marc Laventure " - print "=================================" - print "" - - profile = yaml.safe_load(open("profile.yml", "r")) - br = mechanize.Browser() - br.set_handle_robots(False) - br.set_handle_equiv(False) - br.addheaders = [('User-Agent', USER_AGENT), ('Accept', '*/*')] - - if profile['TOP'] == 1: - hashtags = getTopHashTags(br) - else: - hashtags = getHashtagsFromFile() - like(br, hashtags) + print "=================================" + print " InstaBot " + print " Developed by Marc Laventure " + print "=================================" + print + + directory = path.abspath(path.dirname(__file__)) + profile_filename = path.join(directory, 'profile.yml') + profile = yaml.safe_load(open(profile_filename, "r")) + br = mechanize.Browser() + br.set_handle_robots(False) + br.set_handle_equiv(False) + br.addheaders = [('User-Agent', USER_AGENT), ('Accept', '*/*')] + + if profile['TOP'] == 1: + hashtags = getTopHashTags(br) + else: + hashtags_filename = path.join(directory, 'hashtags.txt') + hashtags = getHashtagsFromFile(hashtags_filename) + like(br, hashtags) From 2879523c142800a52290f21ef85b10fdb418d8a3 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 9 Jun 2015 06:39:33 +0300 Subject: [PATCH 02/48] Mechanize and pycurl dependencies were replaced by Requests dependency. --- InstaBot.py | 89 ++++++++++++++++++++++------------------------------- 1 file changed, 36 insertions(+), 53 deletions(-) diff --git a/InstaBot.py b/InstaBot.py index af9d0e7..a821c9c 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,11 +1,11 @@ -import mechanize, yaml, re, time, sys, pycurl, hmac, urllib +import yaml, re, time, sys, hmac, requests, urllib from hashlib import sha256 from os import path -WEBSTA_URL = "http://websta.me/" -WEBSTA_HASHTAG = WEBSTA_URL + "hot" +WEBSTA_URL = 'http://websta.me/' +WEBSTA_HASHTAG = WEBSTA_URL + 'hot' -INSTAGRAM_API = "https://api.instagram.com/v1" +INSTAGRAM_API = 'https://api.instagram.com/v1' USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' def get_signature(endpoint, params, secret): @@ -16,36 +16,20 @@ def get_signature(endpoint, params, secret): def encodeAndRequest(id): ''' Function to encode the string with the IP and ID of the picture then like it. ''' - c = pycurl.Curl() - '''signature = hmac.new(str(profile['CREDENTIALS']['CLIENT_SECRET']), profile['IP'], sha256).hexdigest() - header = '|'.join([profile['IP'], signature]) - header = ["X-Insta-Forwarded-For: " + header]''' endpoint = '/media/' + id + '/likes' - post_data_dict = { 'access_token': profile['CREDENTIALS']['ACCESS_TOKEN'], } post_data_dict['sig'] = get_signature(endpoint, post_data_dict, profile['CREDENTIALS']['CLIENT_SECRET']) - print post_data_dict, endpoint - post_data = urllib.urlencode(post_data_dict) - url = INSTAGRAM_API + endpoint - c.setopt(c.URL, url) - c.setopt(c.POSTFIELDS, post_data) - #c.setopt(pycurl.HTTPHEADER, header) - c.perform() - - response = str(c.getinfo(c.HTTP_CODE)) - c.close() + return requests.post(url, data=post_data_dict) - return response +def getTopHashTags(): + ''' Function to parse the Top HashTag page and get the current top hashtags. ''' + response = requests.get(WEBSTA_HASHTAG) + topHashtags = re.findall('\"\>#(.*)\<\/a\>\<\/strong\>', response.text) + return topHashtags -# Function to parse the Top HashTag page and get the current top hashtags -def getTopHashTags(br): - br.open(WEBSTA_HASHTAG) - topHashtags = re.findall('\"\>#(.*)\<\/a\>\<\/strong\>', br.response().read()) - return topHashtags - # Function to read the hashtags from a users file if not wanting to parse the top 100 def getHashtagsFromFile(filename): #your list of hashtags @@ -56,51 +40,54 @@ def getHashtagsFromFile(filename): hashtags = [unicode(line.strip(), 'utf-8') for line in open(filename)] f.close() return hashtags - -# Function to like hashtages -def like(br, hashtags): + +def like(hashtags): + ''' Function to like hashtages. ''' likes = 0 for hashtag in hashtags: hashtaglikes = 0 media_id = [] - response = br.open(WEBSTA_URL +"tag/" + urllib.quote(hashtag.encode('utf-8'))) - print u"Liking #%s" % hashtag - media_id = re.findall("span class=\"like_count_(.*)\"", response.read()) - - for id in media_id: + hashtag_url = WEBSTA_URL +'tag/' + urllib.quote(hashtag.encode('utf-8')) + response = requests.get(hashtag_url) + print u'Liking #%s' % hashtag + media_ids = re.findall('span class=\"like_count_(.*)\"', response.text) - if profile['MAXLIKES'] == "NO_MAX": + for media_id in media_ids: + if profile['MAXLIKES'] == 'NO_MAX': pass elif likes >= int(profile['MAXLIKES']): - print "You have reached MAX_LIKES(" + str(profile['MAXLIKES']) + ")" - print u"This # is currently %s" % hashtag + print 'You have reached MAX_LIKES(%d)' % profile['MAXLIKES'] + print u'This # is currently %s' % hashtag sys.exit() break if profile['PERHASHTAG'] == "NO_MAX": pass elif hashtaglikes >= int(profile['PERHASHTAG']): - print "REACHED MAX_LIKES PER HASHTAG" - print "MOVING ONTO NEXT HASHTAG" + print 'REACHED MAX_LIKES PER HASHTAG' + print 'MOVING ONTO NEXT HASHTAG' hashtaglikes = 0 break - response = encodeAndRequest(id) - - if bool(re.search("200", response)): - print " YOU LIKED " + str(id) + response = encodeAndRequest(media_id) + if response.status_code == 200: + print ' YOU LIKED %s' % media_id likes += 1 hashtaglikes += 1 time.sleep(profile['SLEEPTIME']) + elif response.status_code == 429: + print ' TOO MANY REQUESTS' + print response.text + return else: - print "SOMETHING WENT WRONG" + print 'SOMETHING WENT WRONG' print response - print "SLEEPING FOR 60 seconds" - print "CURRENTLY LIKED " + str(likes) + " photos" + print 'SLEEPING FOR 60 seconds' + print 'CURRENTLY LIKED %d photos' % likes time.sleep(60) - print "YOU LIKED " + str(likes) + " photos" + print 'YOU LIKED %d photos' % likes if __name__ == "__main__": print "=================================" @@ -112,14 +99,10 @@ def like(br, hashtags): directory = path.abspath(path.dirname(__file__)) profile_filename = path.join(directory, 'profile.yml') profile = yaml.safe_load(open(profile_filename, "r")) - br = mechanize.Browser() - br.set_handle_robots(False) - br.set_handle_equiv(False) - br.addheaders = [('User-Agent', USER_AGENT), ('Accept', '*/*')] if profile['TOP'] == 1: - hashtags = getTopHashTags(br) + hashtags = getTopHashTags() else: hashtags_filename = path.join(directory, 'hashtags.txt') hashtags = getHashtagsFromFile(hashtags_filename) - like(br, hashtags) + like(hashtags) From cfdc85dd019d4d2e8fefa160ccc5f96050d39d7a Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 9 Jun 2015 06:57:19 +0300 Subject: [PATCH 03/48] Unicode console output troubles were fixed. --- InstaBot.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/InstaBot.py b/InstaBot.py index a821c9c..379e2a2 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,4 +1,5 @@ import yaml, re, time, sys, hmac, requests, urllib +import sys from hashlib import sha256 from os import path @@ -8,13 +9,17 @@ INSTAGRAM_API = 'https://api.instagram.com/v1' USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' +def unicode_print(s): + ''' Handles various troubles with unicode console output. ''' + print s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace') + def get_signature(endpoint, params, secret): sig = endpoint for key in sorted(params.keys()): sig += '|%s=%s' % (key, params[key]) return hmac.new(secret, sig, sha256).hexdigest() -def encodeAndRequest(id): +def like_media(id): ''' Function to encode the string with the IP and ID of the picture then like it. ''' endpoint = '/media/' + id + '/likes' post_data_dict = { @@ -24,14 +29,14 @@ def encodeAndRequest(id): url = INSTAGRAM_API + endpoint return requests.post(url, data=post_data_dict) -def getTopHashTags(): +def get_top_hashtags(): ''' Function to parse the Top HashTag page and get the current top hashtags. ''' response = requests.get(WEBSTA_HASHTAG) topHashtags = re.findall('\"\>#(.*)\<\/a\>\<\/strong\>', response.text) return topHashtags -# Function to read the hashtags from a users file if not wanting to parse the top 100 -def getHashtagsFromFile(filename): +def get_hashtags_from_file(filename): + ''' Function to read the hashtags from a users file if not wanting to parse the top 100. ''' #your list of hashtags hashtags = [] #Hashtag file input @@ -41,7 +46,7 @@ def getHashtagsFromFile(filename): f.close() return hashtags -def like(hashtags): +def like_hashtags(hashtags): ''' Function to like hashtages. ''' likes = 0 @@ -50,7 +55,7 @@ def like(hashtags): media_id = [] hashtag_url = WEBSTA_URL +'tag/' + urllib.quote(hashtag.encode('utf-8')) response = requests.get(hashtag_url) - print u'Liking #%s' % hashtag + unicode_print(u'Liking #%s' % hashtag) media_ids = re.findall('span class=\"like_count_(.*)\"', response.text) for media_id in media_ids: @@ -58,7 +63,7 @@ def like(hashtags): pass elif likes >= int(profile['MAXLIKES']): print 'You have reached MAX_LIKES(%d)' % profile['MAXLIKES'] - print u'This # is currently %s' % hashtag + unicode_print(u'This # is currently %s' % hashtag) sys.exit() break @@ -70,7 +75,7 @@ def like(hashtags): hashtaglikes = 0 break - response = encodeAndRequest(media_id) + response = like_media(media_id) if response.status_code == 200: print ' YOU LIKED %s' % media_id likes += 1 @@ -89,11 +94,11 @@ def like(hashtags): print 'YOU LIKED %d photos' % likes -if __name__ == "__main__": - print "=================================" - print " InstaBot " - print " Developed by Marc Laventure " - print "=================================" +if __name__ == '__main__': + print '=================================' + print ' InstaBot ' + print ' Developed by Marc Laventure ' + print '=================================' print directory = path.abspath(path.dirname(__file__)) @@ -101,8 +106,8 @@ def like(hashtags): profile = yaml.safe_load(open(profile_filename, "r")) if profile['TOP'] == 1: - hashtags = getTopHashTags() + hashtags = get_top_hashtags() else: hashtags_filename = path.join(directory, 'hashtags.txt') - hashtags = getHashtagsFromFile(hashtags_filename) - like(hashtags) + hashtags = get_hashtags_from_file(hashtags_filename) + like_hashtags(hashtags) From 48c4279e556d10d63525b80619953c6f8254b975 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 10 Jun 2015 17:05:15 +0300 Subject: [PATCH 04/48] Instagram private API was used to like media. --- .gitignore | 1 + InstaBot.py | 50 ++++++++++++++++------------------------ instagram.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 30 deletions(-) create mode 100644 .gitignore create mode 100644 instagram.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e99e36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/InstaBot.py b/InstaBot.py index 379e2a2..935795a 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,4 +1,5 @@ import yaml, re, time, sys, hmac, requests, urllib +import instagram import sys from hashlib import sha256 from os import path @@ -13,22 +14,6 @@ def unicode_print(s): ''' Handles various troubles with unicode console output. ''' print s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace') -def get_signature(endpoint, params, secret): - sig = endpoint - for key in sorted(params.keys()): - sig += '|%s=%s' % (key, params[key]) - return hmac.new(secret, sig, sha256).hexdigest() - -def like_media(id): - ''' Function to encode the string with the IP and ID of the picture then like it. ''' - endpoint = '/media/' + id + '/likes' - post_data_dict = { - 'access_token': profile['CREDENTIALS']['ACCESS_TOKEN'], - } - post_data_dict['sig'] = get_signature(endpoint, post_data_dict, profile['CREDENTIALS']['CLIENT_SECRET']) - url = INSTAGRAM_API + endpoint - return requests.post(url, data=post_data_dict) - def get_top_hashtags(): ''' Function to parse the Top HashTag page and get the current top hashtags. ''' response = requests.get(WEBSTA_HASHTAG) @@ -46,7 +31,7 @@ def get_hashtags_from_file(filename): f.close() return hashtags -def like_hashtags(hashtags): +def like_hashtags(hashtags, client): ''' Function to like hashtages. ''' likes = 0 @@ -75,22 +60,24 @@ def like_hashtags(hashtags): hashtaglikes = 0 break - response = like_media(media_id) - if response.status_code == 200: - print ' YOU LIKED %s' % media_id - likes += 1 - hashtaglikes += 1 - time.sleep(profile['SLEEPTIME']) - elif response.status_code == 429: - print ' TOO MANY REQUESTS' - print response.text - return - else: + try: + client.like(media_id) + except instagram.APIError as e: + status_code = int(e.status_code) + if status_code in (403, 429): + print ' TOO MANY REQUESTS' + print e + return print 'SOMETHING WENT WRONG' - print response + print e print 'SLEEPING FOR 60 seconds' print 'CURRENTLY LIKED %d photos' % likes time.sleep(60) + else: + print ' YOU LIKED %s' % media_id + likes += 1 + hashtaglikes += 1 + time.sleep(profile['SLEEPTIME']) print 'YOU LIKED %d photos' % likes @@ -105,9 +92,12 @@ def like_hashtags(hashtags): profile_filename = path.join(directory, 'profile.yml') profile = yaml.safe_load(open(profile_filename, "r")) + client = instagram.Client() + client.login(profile['CREDENTIALS']['LOGIN'], profile['CREDENTIALS']['PASSWORD']) + if profile['TOP'] == 1: hashtags = get_top_hashtags() else: hashtags_filename = path.join(directory, 'hashtags.txt') hashtags = get_hashtags_from_file(hashtags_filename) - like_hashtags(hashtags) + like_hashtags(hashtags, client) diff --git a/instagram.py b/instagram.py new file mode 100644 index 0000000..55bdd66 --- /dev/null +++ b/instagram.py @@ -0,0 +1,65 @@ +import mechanize +import urllib + +BASE_URL = 'https://instagram.com/' + +class APIError(Exception): + def __init__(self, message, error): + print error + print dir(error) + print error.read() + self.message = message + ' ' + str(error) + self.status_code = error.getcode() + +class Client(object): + def __init__(self): + self._cookiejar = mechanize.CookieJar() + self._browser = mechanize.Browser() + self._browser.set_cookiejar(self._cookiejar) + + def _ajax(self, url, data=None, referer=BASE_URL): + if isinstance(data, dict): + data = urllib.urlencode(data) + headers = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': referer, + 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:38.0) Gecko/20100101 Firefox/38.0', + 'X-CSRFToken': self._csrf_token, + 'X-Instagram-AJAX': '1', + 'X-Requested-With': 'XMLHttpRequest', + } + request = mechanize.Request( + BASE_URL + url, + data=data, + headers=headers, + ) + self._cookiejar.add_cookie_header(request) + try: + response = self._browser.open(request) + except Exception as e: + e = APIError('Error during making AJAX request.', e) + e.status_code = e.status_code + raise e + return response + + def _get_cookie(self, name): + for cookie in self._cookiejar: + if cookie.name == name: + return cookie.value + raise KeyError() + + def like(self, media_id): + self._ajax('web/likes/%s/like/' % media_id, data='') + + def login(self, login, password): + login_page_url = 'https://instagram.com/accounts/login/' + response = self._browser.open(login_page_url) + self._update_csrf_token() + login_response = self._ajax('accounts/login/ajax/', referer=login_page_url, data={ + 'username': login, + 'password': password, + }) + self._update_csrf_token() # CSRF token is refreshed after login request. + + def _update_csrf_token(self): + self._csrf_token = self._get_cookie('csrftoken') From 58014a0c799fa6df7a7d2263fa11b5b7ccc911ac Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 10 Jun 2015 18:57:04 +0300 Subject: [PATCH 05/48] Stateful like scheduler was implemented. --- .gitignore | 5 ++- InstaBot.py | 123 ++++++++++++++++----------------------------------- hashtags.txt | 3 -- instagram.py | 3 -- profile.yml | 8 ---- schedule.py | 63 ++++++++++++++++++++++++++ state.py | 46 +++++++++++++++++++ 7 files changed, 151 insertions(+), 100 deletions(-) delete mode 100644 hashtags.txt delete mode 100644 profile.yml create mode 100644 schedule.py create mode 100644 state.py diff --git a/.gitignore b/.gitignore index 7e99e36..ef2d4bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -*.pyc \ No newline at end of file +*.pyc +state.yml +configuration.yml +hashtags.txt diff --git a/InstaBot.py b/InstaBot.py index 935795a..c6ccdbf 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,12 +1,11 @@ -import yaml, re, time, sys, hmac, requests, urllib +import yaml, re, time, sys, hmac import instagram +import schedule +import state import sys -from hashlib import sha256 +from datetime import date from os import path -WEBSTA_URL = 'http://websta.me/' -WEBSTA_HASHTAG = WEBSTA_URL + 'hot' - INSTAGRAM_API = 'https://api.instagram.com/v1' USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' @@ -14,90 +13,44 @@ def unicode_print(s): ''' Handles various troubles with unicode console output. ''' print s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace') -def get_top_hashtags(): - ''' Function to parse the Top HashTag page and get the current top hashtags. ''' - response = requests.get(WEBSTA_HASHTAG) - topHashtags = re.findall('\"\>#(.*)\<\/a\>\<\/strong\>', response.text) - return topHashtags - -def get_hashtags_from_file(filename): - ''' Function to read the hashtags from a users file if not wanting to parse the top 100. ''' - #your list of hashtags - hashtags = [] - #Hashtag file input - f = open(filename) - #strips newline character - hashtags = [unicode(line.strip(), 'utf-8') for line in open(filename)] - f.close() - return hashtags - -def like_hashtags(hashtags, client): +def like_hashtags(schedule, client, state): ''' Function to like hashtages. ''' - likes = 0 - - for hashtag in hashtags: - hashtaglikes = 0 - media_id = [] - hashtag_url = WEBSTA_URL +'tag/' + urllib.quote(hashtag.encode('utf-8')) - response = requests.get(hashtag_url) - unicode_print(u'Liking #%s' % hashtag) - media_ids = re.findall('span class=\"like_count_(.*)\"', response.text) - - for media_id in media_ids: - if profile['MAXLIKES'] == 'NO_MAX': - pass - elif likes >= int(profile['MAXLIKES']): - print 'You have reached MAX_LIKES(%d)' % profile['MAXLIKES'] - unicode_print(u'This # is currently %s' % hashtag) - sys.exit() - break - - if profile['PERHASHTAG'] == "NO_MAX": - pass - elif hashtaglikes >= int(profile['PERHASHTAG']): - print 'REACHED MAX_LIKES PER HASHTAG' - print 'MOVING ONTO NEXT HASHTAG' - hashtaglikes = 0 - break - - try: - client.like(media_id) - except instagram.APIError as e: - status_code = int(e.status_code) - if status_code in (403, 429): - print ' TOO MANY REQUESTS' - print e - return - print 'SOMETHING WENT WRONG' + while True: + media_id = schedule.next() + try: + client.like(media_id) + except instagram.APIError as e: + status_code = int(e.status_code) + if status_code in (403, 429): + print ' TOO MANY REQUESTS' print e - print 'SLEEPING FOR 60 seconds' - print 'CURRENTLY LIKED %d photos' % likes - time.sleep(60) - else: - print ' YOU LIKED %s' % media_id - likes += 1 - hashtaglikes += 1 - time.sleep(profile['SLEEPTIME']) - - print 'YOU LIKED %d photos' % likes + return + print 'SOMETHING WENT WRONG' + print e + print 'SLEEPING FOR 60 seconds' + print 'CURRENTLY LIKED %d photos' % likes + time.sleep(60) + else: + print ' YOU LIKED %s' % media_id + state.increment(str(date.today())) + time.sleep(configuration['SLEEPTIME']) if __name__ == '__main__': - print '=================================' - print ' InstaBot ' - print ' Developed by Marc Laventure ' - print '=================================' - print - directory = path.abspath(path.dirname(__file__)) - profile_filename = path.join(directory, 'profile.yml') - profile = yaml.safe_load(open(profile_filename, "r")) + configuration_filename = path.join(directory, 'configuration.yml') + configuration = yaml.safe_load(open(configuration_filename, "r")) client = instagram.Client() - client.login(profile['CREDENTIALS']['LOGIN'], profile['CREDENTIALS']['PASSWORD']) - - if profile['TOP'] == 1: - hashtags = get_top_hashtags() - else: - hashtags_filename = path.join(directory, 'hashtags.txt') - hashtags = get_hashtags_from_file(hashtags_filename) - like_hashtags(hashtags, client) + client.login(configuration['CREDENTIALS']['LOGIN'], configuration['CREDENTIALS']['PASSWORD']) + + current_state = state.State(path.join(directory, 'state.yml')) + _schedule = schedule.Schedule( + hashtags_filename=path.join(directory, 'hashtags.txt'), + configuration=configuration, + state=current_state, + ) + like_hashtags( + schedule=_schedule, + client=client, + state=current_state, + ) diff --git a/hashtags.txt b/hashtags.txt deleted file mode 100644 index ec1bf27..0000000 --- a/hashtags.txt +++ /dev/null @@ -1,3 +0,0 @@ -I -Love -Python \ No newline at end of file diff --git a/instagram.py b/instagram.py index 55bdd66..dfbb941 100644 --- a/instagram.py +++ b/instagram.py @@ -5,9 +5,6 @@ class APIError(Exception): def __init__(self, message, error): - print error - print dir(error) - print error.read() self.message = message + ' ' + str(error) self.status_code = error.getcode() diff --git a/profile.yml b/profile.yml deleted file mode 100644 index dd08fce..0000000 --- a/profile.yml +++ /dev/null @@ -1,8 +0,0 @@ -CREDENTIALS: - ACCESS_TOKEN: USER_ACCESS_TOKEN - CLIENT_SECRET: "USER_CLIENT_SECRET" -SLEEPTIME: 1 -MAXLIKES: 10 -PERHASHTAG: 2 -TOP: 1 -IP: "USER_IP_ADDRESS" diff --git a/schedule.py b/schedule.py new file mode 100644 index 0000000..9be8e96 --- /dev/null +++ b/schedule.py @@ -0,0 +1,63 @@ +import re +import requests +import urllib + +WEBSTA_URL = 'http://websta.me/' + +class ScheduleError(Exception): + pass + +class Schedule(object): + def __init__(self, hashtags_filename, configuration, state): + self._configuration = configuration + self._hashtags = self._get_hashtags_from_file(hashtags_filename) + self._state = state + self._update_media_ids(self._get_current_hashtag()) + + def next(self): + media_id_index = self._state.increment('schedule_media_id_index', 0) + if media_id_index >= len(self._media_ids) or media_id_index >= self._configuration['PERHASHTAG']: + media_id_index = 0 + self._state['schedule_media_id_index'] = media_id_index + self._update_media_ids(self._get_next_hashtag()) + return self._media_ids[media_id_index] + + def _get_hashtags_from_file(self, filename): + ''' Function to read the hashtags from a users file if not wanting to parse the top 100. ''' + hashtags = [] + f = open(filename, 'rb') + # Strip newline character. + hashtags = [unicode(line, 'utf-8').strip() for line in open(filename)] + f.close() + return hashtags + + def _get_current_hashtag(self): + hashtag_index = self._state.get('schedule_hashtag_index', 0) + try: + hashtag = self._hashtags[hashtag_index] + except IndexError: + hashtag_index = 0 + try: + hashtag = self._hashtags[hashtag_index] + except IndexError: + raise ScheduleError('Hashtags array is empty.') + self._state['schedule_hashtag_index'] = hashtag_index + return hashtag + + def _get_next_hashtag(self): + hashtag_index = self._state.increment('schedule_hashtag_index', 0) + try: + hashtag = self._hashtags[hashtag_index] + except IndexError: + hashtag_index = 0 + try: + hashtag = self._hashtags[hashtag_index] + except IndexError: + raise ScheduleError('Hashtags array is empty.') + self._state['schedule_hashtag_index'] = hashtag_index + return hashtag + + def _update_media_ids(self, hashtag): + hashtag_url = WEBSTA_URL +'tag/' + urllib.quote(hashtag.encode('utf-8')) + response = requests.get(hashtag_url) + self._media_ids = re.findall('span class=\"like_count_(.*)\"', response.text) diff --git a/state.py b/state.py new file mode 100644 index 0000000..9f819e8 --- /dev/null +++ b/state.py @@ -0,0 +1,46 @@ +import yaml + +class State: + def __init__(self, path): + self._path = path + try: + f = open(path, 'rb') + except IOError: + self._data = {} + return + self._data = yaml.load(f) + f.close() + + def __delitem__(self, key, value): + del self._data[key] + self._save() + + def __getitem__(self, key): + return self._data[key] + + def __setitem__(self, key, value): + self._data[key] = value + self._save() + + def _save(self): + f = open(self._path, 'wb') + yaml.dump(self._data, f) + f.close() + + def increment(self, key, default_value=1): + try: + value = self._data[key] + except KeyError: + value = default_value + else: + value += 1 + self[key] = value + return value + + def get(self, key, default_value=None): + try: + value = self._data[key] + except KeyError: + value = default_value + self[key] = value + return value From 7c2fac40648da49a29f2c9e28b826f18b3f283b3 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 10 Jun 2015 19:38:53 +0300 Subject: [PATCH 06/48] README.md was updated. Requirements were put at requirements.txt --- PycURL Download.md | 9 ------- README.md | 61 ++++++++++++++++------------------------------ requirements.txt | 3 +++ 3 files changed, 24 insertions(+), 49 deletions(-) delete mode 100644 PycURL Download.md create mode 100644 requirements.txt diff --git a/PycURL Download.md b/PycURL Download.md deleted file mode 100644 index e210e72..0000000 --- a/PycURL Download.md +++ /dev/null @@ -1,9 +0,0 @@ -[Download PycURL](http://pycurl.sourceforge.net/) - -Run - -``` -tar -xvf pycurl-7.19.5.tar.gz -cd pycurl-7.19.5/ -sudo python setup.py install -``` diff --git a/README.md b/README.md index b1c0831..4170282 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,42 @@ -InstaBot -======== +# Instagram Bot -NOTE BIG UPDATE ON FUCTIONALITY; PLEASE UPDATE YOUR WORKING COPY AND FOLLOW NEW INSTRUCTIONS! - -A simple Instagram bot that pulls trending top 100 hashtags and auto likes pictures with those hashtags to get more followers. +A simple Instagram bot that cycles through hashtags listed at a file and automatically likes pictures with those hashtags to get more followers. Developed in Python and built with the mechanize library -STILL IN DEVELOPMENT, CONTRIBUTIONS ARE WELCOME +## Setup -##Requirements +At first, get the source. Clone this repository: -1. Python is installed (Tested with version 2.6.8) -2. mechanize library is installed [Mechanize download!](http://wwwsearch.sourceforge.net/mechanize/download.html) V0.2.5 -3. PyYAML libray is installed [PyYAML download!](pyyaml.org/wiki/PyYAML) V3.11 -4. Authenticated your instagram account on [websta.me](http://websta.me/) -5. PycURL library installed [PycURL download!](http://pycurl.sourceforge.net/) V7.19.5 -6. Registered a client for your account on [instagram](http://instagram.com/developer/clients/manage/) + $ git clone https://github.com/marclave/InstaBot.git -##Setup -Clone this repository: -``` -git clone https://github.com/marclave/InstaBot.git -``` -Follow install instructions for PycURL: [instructions](PycURL Download.md) +### Requirements -Go to [instagram clients](http://instagram.com/developer/clients/manage/) -Register your account for a developers client -Retrieve your CLIENT SECRET and USER ID token under "Manage Clients" -To retrieve your access token, go to [instagram api console](http://instagram.com/developer/api-console/) -Run a query involving your USER ID and grab your access token from the request +You can install all needed requirements with single command: -Note: Ensure likes are part of the access scope [enable likes scope](https://instagram.com/oauth/authorize/?client_id=INSERT_CLIENTID&redirect_uri=INSERT_REDIRECTURI&response_type=code&scope=likes+basic) + $ pip install -r requirements.txt +### Configuration -Modify the profile to include your information, example: -``` +Create `configuration.yml` file containing your information, e.g.: + +```yaml CREDENTIALS: - ACCESS_TOKEN: "USER_ACCESS_TOKEN" - CLIENT_SECRET: "USER_CLIENT_SECRET" -MAXLIKES: 1000 <- If you dont want a max, input NO_MAX -PERHASHTAG: 10 <- If you dont want a max, input NO_MAX -TOP: 1 <- To use the top hashtags on Websta.me use a 1 -IP: "USER_IP_ADDRESS" <- run ipconfig or ifconfig to grab your ip address + LOGIN: "your_login" + PASSWORD: "topsecret" +SLEEPTIME: 5 +PERHASHTAG: 10 ``` -Note: If you do not put a 1 in the value of TOP then the program will look for a text file -called hashtags.txt. -The format for this file is to have each hashtag seperated by line, example: +The format for `hashtags.txt` file is to have each hashtag seperated by line, e.g.: ``` I Love Python ``` +## Launching -Then run: -``` -python InstaBot.py -``` +Run: + + $ python InstaBot.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d4d2156 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +mechanize >= 0.2.5 +PyYAML >= 3.11 +requests >= 2.7.0 From 3a431e1763012f503c8c5d2a594233e456d26fe4 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Thu, 11 Jun 2015 00:42:40 +0300 Subject: [PATCH 07/48] Repo link was fixed at README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4170282..08ac351 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Developed in Python and built with the mechanize library At first, get the source. Clone this repository: - $ git clone https://github.com/marclave/InstaBot.git + $ git clone https://github.com/quasiyoke/InstaBot.git ### Requirements From b06c92790f6bb61130698fbb96acb88f1d685c35 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Fri, 15 Jan 2016 17:42:37 +0300 Subject: [PATCH 08/48] Login was fixed. Logging was slightly improved. --- .gitignore | 1 + InstaBot.py | 18 ++++++++++-------- instagram.py | 8 +++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index ef2d4bd..793ccdd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ state.yml configuration.yml hashtags.txt +*.log diff --git a/InstaBot.py b/InstaBot.py index c6ccdbf..e69385d 100644 --- a/InstaBot.py +++ b/InstaBot.py @@ -1,5 +1,6 @@ import yaml, re, time, sys, hmac import instagram +import logging import schedule import state import sys @@ -11,10 +12,11 @@ def unicode_print(s): ''' Handles various troubles with unicode console output. ''' - print s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace') + logging.debug(s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace')) def like_hashtags(schedule, client, state): ''' Function to like hashtages. ''' + logging.info('Started to like by hashtags') while True: media_id = schedule.next() try: @@ -22,16 +24,15 @@ def like_hashtags(schedule, client, state): except instagram.APIError as e: status_code = int(e.status_code) if status_code in (403, 429): - print ' TOO MANY REQUESTS' - print e + logging.debug(' TOO MANY REQUESTS') + logging.debug(e) return - print 'SOMETHING WENT WRONG' - print e - print 'SLEEPING FOR 60 seconds' - print 'CURRENTLY LIKED %d photos' % likes + logging.debug('SOMETHING WENT WRONG') + logging.debug(e) + logging.debug('SLEEPING FOR 60 seconds') time.sleep(60) else: - print ' YOU LIKED %s' % media_id + logging.debug(' YOU LIKED %s' % media_id) state.increment(str(date.today())) time.sleep(configuration['SLEEPTIME']) @@ -39,6 +40,7 @@ def like_hashtags(schedule, client, state): directory = path.abspath(path.dirname(__file__)) configuration_filename = path.join(directory, 'configuration.yml') configuration = yaml.safe_load(open(configuration_filename, "r")) + logging.basicConfig(filename=path.join(directory, 'log.log'), level=logging.DEBUG) client = instagram.Client() client.login(configuration['CREDENTIALS']['LOGIN'], configuration['CREDENTIALS']['PASSWORD']) diff --git a/instagram.py b/instagram.py index dfbb941..4526254 100644 --- a/instagram.py +++ b/instagram.py @@ -1,7 +1,8 @@ +import logging import mechanize import urllib -BASE_URL = 'https://instagram.com/' +BASE_URL = 'https://www.instagram.com/' class APIError(Exception): def __init__(self, message, error): @@ -20,7 +21,7 @@ def _ajax(self, url, data=None, referer=BASE_URL): headers = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Referer': referer, - 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:38.0) Gecko/20100101 Firefox/38.0', + 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:43.0) Gecko/20100101 Firefox/43.0', 'X-CSRFToken': self._csrf_token, 'X-Instagram-AJAX': '1', 'X-Requested-With': 'XMLHttpRequest', @@ -49,7 +50,7 @@ def like(self, media_id): self._ajax('web/likes/%s/like/' % media_id, data='') def login(self, login, password): - login_page_url = 'https://instagram.com/accounts/login/' + login_page_url = BASE_URL response = self._browser.open(login_page_url) self._update_csrf_token() login_response = self._ajax('accounts/login/ajax/', referer=login_page_url, data={ @@ -60,3 +61,4 @@ def login(self, login, password): def _update_csrf_token(self): self._csrf_token = self._get_cookie('csrftoken') + logging.debug('csrftoken is %s', self._csrf_token) From 84761b07a6357b7fe0ec2de6dd716c9350516c42 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Thu, 18 Feb 2016 19:19:48 +0300 Subject: [PATCH 09/48] Complete refactoring. --- InstaBot.py | 58 ------------------ README.md | 49 ++++++++++----- instabot/__main__.py | 5 ++ instabot/configuration.py | 32 ++++++++++ instabot/db.py | 34 +++++++++++ instabot/errors.py | 10 ++++ instabot/following_service.py | 39 ++++++++++++ instabot/instabot.py | 88 +++++++++++++++++++++++++++ instabot/instagram.py | 100 +++++++++++++++++++++++++++++++ instabot/like_service.py | 32 ++++++++++ instabot/media_service.py | 38 ++++++++++++ instabot/stats_service.py | 42 +++++++++++++ instabot/user.py | 19 ++++++ instabot/user_service.py | 53 +++++++++++++++++ instabot/views.py | 108 ++++++++++++++++++++++++++++++++++ instabot_runner.py | 6 ++ instagram.py | 64 -------------------- requirements.txt | 7 ++- schedule.py | 63 -------------------- state.py | 46 --------------- 20 files changed, 645 insertions(+), 248 deletions(-) delete mode 100644 InstaBot.py create mode 100644 instabot/__main__.py create mode 100644 instabot/configuration.py create mode 100644 instabot/db.py create mode 100644 instabot/errors.py create mode 100644 instabot/following_service.py create mode 100644 instabot/instabot.py create mode 100644 instabot/instagram.py create mode 100644 instabot/like_service.py create mode 100644 instabot/media_service.py create mode 100644 instabot/stats_service.py create mode 100644 instabot/user.py create mode 100644 instabot/user_service.py create mode 100644 instabot/views.py create mode 100755 instabot_runner.py delete mode 100644 instagram.py delete mode 100644 schedule.py delete mode 100644 state.py diff --git a/InstaBot.py b/InstaBot.py deleted file mode 100644 index e69385d..0000000 --- a/InstaBot.py +++ /dev/null @@ -1,58 +0,0 @@ -import yaml, re, time, sys, hmac -import instagram -import logging -import schedule -import state -import sys -from datetime import date -from os import path - -INSTAGRAM_API = 'https://api.instagram.com/v1' -USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' - -def unicode_print(s): - ''' Handles various troubles with unicode console output. ''' - logging.debug(s.encode(sys.stdout.encoding or 'ascii', 'backslashreplace')) - -def like_hashtags(schedule, client, state): - ''' Function to like hashtages. ''' - logging.info('Started to like by hashtags') - while True: - media_id = schedule.next() - try: - client.like(media_id) - except instagram.APIError as e: - status_code = int(e.status_code) - if status_code in (403, 429): - logging.debug(' TOO MANY REQUESTS') - logging.debug(e) - return - logging.debug('SOMETHING WENT WRONG') - logging.debug(e) - logging.debug('SLEEPING FOR 60 seconds') - time.sleep(60) - else: - logging.debug(' YOU LIKED %s' % media_id) - state.increment(str(date.today())) - time.sleep(configuration['SLEEPTIME']) - -if __name__ == '__main__': - directory = path.abspath(path.dirname(__file__)) - configuration_filename = path.join(directory, 'configuration.yml') - configuration = yaml.safe_load(open(configuration_filename, "r")) - logging.basicConfig(filename=path.join(directory, 'log.log'), level=logging.DEBUG) - - client = instagram.Client() - client.login(configuration['CREDENTIALS']['LOGIN'], configuration['CREDENTIALS']['PASSWORD']) - - current_state = state.State(path.join(directory, 'state.yml')) - _schedule = schedule.Schedule( - hashtags_filename=path.join(directory, 'hashtags.txt'), - configuration=configuration, - state=current_state, - ) - like_hashtags( - schedule=_schedule, - client=client, - state=current_state, - ) diff --git a/README.md b/README.md index 08ac351..ede794f 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ A simple Instagram bot that cycles through hashtags listed at a file and automatically likes pictures with those hashtags to get more followers. -Developed in Python and built with the mechanize library - ## Setup At first, get the source. Clone this repository: @@ -21,22 +19,45 @@ You can install all needed requirements with single command: Create `configuration.yml` file containing your information, e.g.: ```yaml -CREDENTIALS: - LOGIN: "your_login" - PASSWORD: "topsecret" -SLEEPTIME: 5 -PERHASHTAG: 10 +credentials: + client_id: "1eac8774163c2fc938db3a0ee82a6873" + login: "your_login" + password: "eKeFB2;AW6fS}z" +db: + host: "localhost" + name: "instagram" + user: "instabot" + password: "GT8H!b]5,9}A7" +following_hours: 120 +logging: + version: 1 + formatters: + simple: + class: logging.Formatter + format: "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + handlers: + console: + class: logging.StreamHandler + level: DEBUG + formatter: simple + root: + level: DEBUG + handlers: + - console +hashtags: + - I + - love + - Python ``` -The format for `hashtags.txt` file is to have each hashtag seperated by line, e.g.: +Execute this at MySQL console: + + CREATE DATABASE IF NOT EXISTS instagram CHARACTER SET utf8 COLLATE utf8_general_ci; + CREATE USER instabot@localhost IDENTIFIED BY 'GT8H!b]5,9}A7'; + GRANT ALL ON instagram.* TO instabot@localhost; -``` -I -Love -Python -``` ## Launching Run: - $ python InstaBot.py + $ ./instabot_runner.py diff --git a/instabot/__main__.py b/instabot/__main__.py new file mode 100644 index 0000000..ac05b39 --- /dev/null +++ b/instabot/__main__.py @@ -0,0 +1,5 @@ +'''instabot.__main__: executed when instabot directory is called as script.''' + +from .instabot import main + +main() diff --git a/instabot/configuration.py b/instabot/configuration.py new file mode 100644 index 0000000..5819117 --- /dev/null +++ b/instabot/configuration.py @@ -0,0 +1,32 @@ +import logging +import sys +import yaml + +LOGGER = logging.getLogger('instabot') + +class Configuration: + def __init__(self, filename): + try: + with open(filename, 'r') as f: + configuration = yaml.safe_load(f) + except (IOError, OSError, ValueError) as e: + sys.exit('Can\'t obtain configuration: %s' % e) + try: + self.db_host = configuration['db']['host'] + self.db_name = configuration['db']['name'] + self.db_user = configuration['db']['user'] + self.db_password = configuration['db']['password'] + self.following_hours = configuration['following_hours'] + self.instagram_client_id = configuration['credentials']['client_id'] + self.instagram_login = configuration['credentials']['login'] + self.instagram_password = configuration['credentials']['password'] + self.logging = configuration['logging'] + self.hashtags = configuration['hashtags'] + except KeyError as e: + sys.exit('Configuration is not fully specified: %s' % e) + try: + self.following_hours = int(self.following_hours) + except ValueError as e: + sys.exit('following_hours are specified wrong: %s' % e) + if len(self.hashtags) == 0: + sys.exit('Specify at least one hashtag, please') diff --git a/instabot/db.py b/instabot/db.py new file mode 100644 index 0000000..705ddf2 --- /dev/null +++ b/instabot/db.py @@ -0,0 +1,34 @@ +import logging +import sys +from .errors import DBError +from instabot import user +from peewee import * +from playhouse.shortcuts import RetryOperationalError + +LOGGER = logging.getLogger('instabot') + +class RetryingMySQLDatabase(RetryOperationalError, MySQLDatabase): + ''' + Automatically reconnecting database class. + @see http://docs.peewee-orm.com/en/latest/peewee/database.html#automatic-reconnect + ''' + pass + +def get_db(configuration): + ''' + @raise DBError + ''' + db = RetryingMySQLDatabase( + configuration.db_name, + host=configuration.db_host, + user=configuration.db_user, + password=configuration.db_password, + ) + # Connect to database just to check if configuration has errors. + try: + db.connect() + except DatabaseError as e: + sys.exit('DatabaseError during connecting to database: {0}'.format(e)) + db.close() + user.database_proxy.initialize(db) + return db diff --git a/instabot/errors.py b/instabot/errors.py new file mode 100644 index 0000000..3453e9b --- /dev/null +++ b/instabot/errors.py @@ -0,0 +1,10 @@ +class APIError(Exception): + def __init__(self, message, error): + self.message = message + ' ' + str(error) + self.status_code = error.getcode() + +class APILimitError(Exception): + pass + +class DBError(Exception): + pass diff --git a/instabot/following_service.py b/instabot/following_service.py new file mode 100644 index 0000000..f1ccf20 --- /dev/null +++ b/instabot/following_service.py @@ -0,0 +1,39 @@ +import asyncio +import datetime +import logging +from .errors import APILimitError +from .user import User + +LOGGER = logging.getLogger('instabot') + +class FollowingService: + LONG_AGO_TIMEDELTA = datetime.timedelta(days=5) + + def __init__(self, client): + self._client = client + + @asyncio.coroutine + def run(self): + while True: + try: + yield from self._unfollow() + yield from self._follow() + except APILimitError as e: + LOGGER.debug('Instagram limit was reached during following: %s', e) + yield from asyncio.sleep(60) + else: + yield from asyncio.sleep(10) + + @asyncio.coroutine + def _follow(self): + for user in User.select().where(User.was_followed_at == None).order_by( + User.friending_depth.desc(), + User.friends_fetched.desc(), + ): + self._client.follow(user) + + @asyncio.coroutine + def _unfollow(self): + long_ago = datetime.datetime.utcnow() - LONG_AGO_TIMEDELTA + for user in User.select().where((User.is_followed == True) & (User.was_followed_at < long_ago)): + self._client.unfollow(user) diff --git a/instabot/instabot.py b/instabot/instabot.py new file mode 100644 index 0000000..721b5f5 --- /dev/null +++ b/instabot/instabot.py @@ -0,0 +1,88 @@ +import datetime +import logging +import logging.config +import sys +from .configuration import Configuration +from .db import get_db +from .following_service import FollowingService +from .like_service import LikeService +from .media_service import MediaService +from .stats_service import StatsService +from .user import User +from .user_service import UserService +from docopt import docopt +from instabot import instagram +from os import path + +DIR = path.abspath(path.dirname(__file__)) +DOC = '''InstaBot + +Usage: + instabot CONFIGURATION + instabot install CONFIGURATION + instabot -h | --help | --version + +Arguments: + CONFIGURATION Path to configuration.yml file. +''' +INSTAGRAM_API = 'https://api.instagram.com/v1' +LOGGER = logging.getLogger('instabot') +USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) ' \ + 'Chrome/23.0.1271.64 Safari/537.11' +__version__ = '0.2' + +def install(client, configuration, db): + db.create_tables([User]) + now = datetime.datetime.utcnow() + was_followed_at = now - datetime.timedelta(hours=configuration.following_hours) + User.create( + following_depth=0, + instagram_id=instagram_id, + was_followed_at=was_followed_at, # To prevent attempts to follow user by himself. + were_followers_fetched=True, + ) + for follower_id in asyncio.run_until_complete(client.get_followers()): + User.create( + following_depth=1, + instagram_id=follower_id, + is_followed=True, + was_followed_at=was_followed_at, + ) + +def main(): + arguments = docopt(DOC, version=__version__) + logging.basicConfig(level=logging.DEBUG) + + configuration = Configuration(arguments['CONFIGURATION']) + + logging.config.dictConfig(configuration.logging) + + db = get_db(configuration) + client = instagram.Client(configuration) + + if arguments['install']: + LOGGER.info('Installing InstaBot') + install(client, configuration, db) + else: + LOGGER.info('Executing InstaBot') + run(clientconfiguration) + +def run(configuration): + loop = asyncio.get_event_loop() + + stats_service = StatsService() + loop.create_task(stats_service.run()) + + user_service = UserService(client) + loop.create_task(user_service.run()) + + following_service = FollowingService(client) + loop.create_task(following_service.run()) + + media_service = MediaService(configuration) + loop.create_task(media_service.run()) + + like_service = LikeService(client, media_service) + loop.create_task(like_service.run()) + + loop.run_forever() diff --git a/instabot/instagram.py b/instabot/instagram.py new file mode 100644 index 0000000..088ba5a --- /dev/null +++ b/instabot/instagram.py @@ -0,0 +1,100 @@ +import aiohttp +import asyncio +import logging +from .errors import APIError, APILimitError + +API_URL = 'https://api.instagram.com/v1/' +BASE_URL = 'https://www.instagram.com/' +LOGGER = logging.getLogger('instabot') + +class Client(object): + def __init__(self, configuration): + self._client_id = configuration.instagram_client_id + #self._cookiejar = mechanize.CookieJar() + #self._browser = mechanize.Browser() + #self._browser.set_cookiejar(self._cookiejar) + self._login = configuration.instagram_login + self._password = configuration.instagram_password + self._session = aiohttp.ClientSession() + loop = asyncio.get_event_loop() + loop.run_until_complete(self._do_login()) + + @asyncio.coroutine + def _ajax(self, url, params=None, referer=None): + headers = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:43.0) Gecko/20100101 Firefox/43.0', + 'X-CSRFToken': self._csrf_token, + 'X-Instagram-AJAX': '1', + 'X-Requested-With': 'XMLHttpRequest', + } + if referer is not None: + headers['Referer'] = referer + try: + response = yield from self._session.get( + BASE_URL + url, + params=params, + headers=headers, + ) + response = yield from response.json() + except Exception as e: + raise self._build_error(e) + return response + + def _build_error(self, e): + if e.getcode() in (403, 429): + return APILimitError() + else: + return APIError(e) + + @asyncio.coroutine + def get_followers(self, user): + response = yield from self._get_followers(user=user) + for follower in self._parse_followers(response): + yield follower + next_url = response['pagination'].get('next_url') + while next_url: + yield from asyncio.sleep(.7) + response = yield from self._get_followers(url=next_url) + for follower in self._parse_followers(response): + yield follower + next_url = response['pagination'].get('next_url') + + @asyncio.coroutine + def get_some_followers(self, user): + response = yield from self._get_followers(user) + return self._parse_followers(response) + + @asyncio.coroutine + def _get_followers(self, user=None, url=None): + if url is None: + url = '{0}users/{1}/followed-by?client_id={2}'.format(API_URL, user, self._client_id) + response = yield from aiohttp.get(url) + response = yield from response.json() + return response + + @asyncio.coroutine + def _open(self, url): + response = yield from self._session.get(url) + yield from response.text() + + def _parse_followers(self, response): + return [follower['id'] for follower in response['params']] + + @asyncio.coroutine + def like(self, media): + yield from self._ajax('web/likes/%s/like/' % media, params='') + + @asyncio.coroutine + def _do_login(self): + login_page_url = BASE_URL + response = yield from self._open(login_page_url) + self._update_csrf_token() + login_response = yield from self._ajax('accounts/login/ajax/', referer=login_page_url, params={ + 'username': self._login, + 'password': self._password, + }) + + def _update_csrf_token(self): + self._csrf_token = self._session.cookies['csrftoken'].value + LOGGER.debug('API. CSRF token is %s', self._csrf_token) diff --git a/instabot/like_service.py b/instabot/like_service.py new file mode 100644 index 0000000..e710d4f --- /dev/null +++ b/instabot/like_service.py @@ -0,0 +1,32 @@ +import asyncio +import logging +from .stats_service import StatsService + +LOGGER = logging.getLogger('instabot') + +class LikeService: + def __init__(self, client, media_service): + self._client = client + self._media_service = media_service + self._stats_service = StatsService.get_instance() + + @asyncio.coroutine + def run(self): + while True: + media = yield from self._media_service.pop() + while True: + try: + yield from self._client.like(media) + except instagram.APIError as e: + status_code = int(e.status_code) + if status_code in (403, 429): + LOGGER.debug('Instagram limits reached during liking: %s', e) + yield from asyncio.sleep(60) + else: + LOGGER.debug('Something went wrong during liking: %s', e) + yield from asyncio.sleep(5) + else: + LOGGER.debug('Liked %s', media) + self._stats_service.increment('liked') + yield from asyncio.sleep(.7) + break diff --git a/instabot/media_service.py b/instabot/media_service.py new file mode 100644 index 0000000..eb58352 --- /dev/null +++ b/instabot/media_service.py @@ -0,0 +1,38 @@ +import aiohttp +import asyncio +import itertools +import re +import urllib + +MEDIA_LENGTH_MIN = 100 +WEBSTA_URL = 'http://websta.me/' + +class ScheduleError(Exception): + pass + +class MediaService(object): + def __init__(self, configuration): + self._hashtags = configuration.hashtags + self._media = [] + + @asyncio.coroutine + def _get_media_by_hashtag(self, hashtag): + hashtag_url = '{0}tag/{1}'.format(WEBSTA_URL, urllib.quote(hashtag.encode('utf-8'))) + response = yield from aiohttp.get(hashtag_url) + return re.findall('span class=\"like_count_([^\"]+)\"', response.text) + + @asyncio.coroutine + def run(self): + for hashtag in itertools.cycle(self._hashtags): + while len(self._media) < MEDIA_LENGTH_MIN: + self._media.extend((yield from self._get_media_by_hashtag(hashtag))) + while len(self._media) >= MEDIA_LENGTH_MIN: + yield from asyncio.sleep(5) + + @asyncio.coroutine + def pop(self): + while True: + try: + return self._media.pop(0) + except IndexError: + yield from asyncio.sleep(1) diff --git a/instabot/stats_service.py b/instabot/stats_service.py new file mode 100644 index 0000000..158f744 --- /dev/null +++ b/instabot/stats_service.py @@ -0,0 +1,42 @@ +import asyncio + +class Counter: + def __init__(self): + self._counter = {} + + def clear(self): + self._counter.clear() + + def increment(self, key): + value = self._counter.get(key, 0) + self._counter[key] = value + 1 + + def report(self, prefix): + LOGGER.info('%s %s', prefix, str(self._counter)) + +class StatsService: + def __init__(self): + self._hourly_counter = Counter() + self._daily_counter = Counter() + type(self)._instance = self + + @classmethod + def get_instance(cls): + return cls._instance + + @asyncio.coroutine + def run(self): + hour = 0 + while True: + asyncio.sleep(60 * 60) + hour += 1 + if hour % 24 == 0: + self._daily_counter.report('Daily stats #{0}'.format(hour / 24)) + self._daily_counter.clear() + else: + self._hourly_counter.report('Hourly stats #{0}'.format(hour)) + self._hourly_counter.clear() + + def increment(self, key): + self._hourly_counter.increment(key) + self._daily_counter.increment(key) diff --git a/instabot/user.py b/instabot/user.py new file mode 100644 index 0000000..42761cf --- /dev/null +++ b/instabot/user.py @@ -0,0 +1,19 @@ +import datetime +from peewee import * + +database_proxy = Proxy() + +class User(Model): + instagram_id = CharField(max_length=20, unique=True) + is_followed = BooleanField(default=False) + was_followed_at = DateTimeField(null=True) + were_followers_fetched = BooleanField(default=False) + following_depth = IntegerField() + created = DateTimeField(default=datetime.datetime.utcnow) + + class Meta: + db = database_proxy + indexes = ( + (('is_followed', 'was_followed_at'), False), + (('were_followers_fetched', 'following_depth', 'created'), False) + ) diff --git a/instabot/user_service.py b/instabot/user_service.py new file mode 100644 index 0000000..243370c --- /dev/null +++ b/instabot/user_service.py @@ -0,0 +1,53 @@ +import asyncio +import logging +from .stats_service import StatsService +from .user import User + +LOGGER = logging.getLogger('instabot') +USERS_LIMIT = 1000 + +class UserService: + def __init__(self, client): + self._client = client + self._stats_service = StatsService.get_instance() + + @asyncio.coroutine + def run(self): + while True: + try: + yield from self._ensure_enough_users() + except InstagramLimitError as e: + LOGGER.debug('Fetching users. Instagram limits were reached: %s', e) + yield from asyncio.sleep(60) + except Exception as e: + LOGGER.debug('Fetching users. Some troubles: %s', e) + yield from asyncio.sleep(5) + else: + yield from asyncio.sleep(5) + + @asyncio.coroutine + def _ensure_enough_users(self): + users_to_follow_count = len(User.select().where(User.subscribed_at == None)) + if users_to_follow_count < USERS_LIMIT: + last_users_to_follow_count = users_to_follow_count + for user in User.select().where(User.were_followers_fetched == False).order_by( + User.following_depth, + User.created, + ): + following_depth = user.following_depth + 1 + for follower_id in (yield from client.get_some_followers(user.instagram_id)): + follower, created = User.get_or_create(instagram_id=follower_id) + if created: + follower.following_depth = following_depth + follower.save() + users_to_follow_count += 1 + STATS_SERVICE.increment('users_to_follow_fetched') + elif follower.following_depth > following_depth: + follower.following_depth = following_depth + follower.save() + if users_to_follow_count >= USERS_LIMIT: + break + LOGGER.debug( + 'Fetching users. %d users fetched.', + users_to_follow_count - last_users_to_follow_count, + ) diff --git a/instabot/views.py b/instabot/views.py new file mode 100644 index 0000000..85cee33 --- /dev/null +++ b/instabot/views.py @@ -0,0 +1,108 @@ +import json +from django import http +from cms import sitemaps +from django.core import mail +from django.conf import settings +from django.views.generic import edit as edit_views +from cmsplugin_dog import models as dog_models +import models +import forms + + +def _update_user_subscriptions(user, subscribe): + import mailchimp + from mailchimp import chimp + user.subscribed = subscribe + mailchimp_list = mailchimp.utils.get_connection().get_list_by_id(settings.MAILCHIMP_LIST_ID) + if subscribe: + try: + mailchimp_list.subscribe(user.auth_user.email, { 'EMAIL': user.auth_user.email, 'FNAME': user.auth_user.first_name, }) + except chimp.ChimpyException: + pass + else: + try: + mailchimp_list.unsubscribe(user.auth_user.email) + except chimp.ChimpyException: + pass + + +class Claim(edit_views.FormView): + form_class = forms.ClaimForm + template_name = 'claim.html' + + def get_context_data(self, *args, **kwargs): + context = super(Claim, self).get_context_data(*args, **kwargs) + context['dog'] = getattr(self, 'dog', None) + return context + + def get_initial(self): + initial = super(Claim, self).get_initial() + initial.update(forms.ClaimForm.get_initial(self.request.user)) + return initial + + def get_success_url(self): + return self.dog.get_url() + + def post(self, request, *args, **kwargs): + form_class = self.get_form_class() + form = self.get_form(form_class) + self.dog = self.get_dog(form) + if form.is_valid(): + return self.form_valid(form) + else: + return self.form_invalid(form) + + def get_dog(self, form): + try: + return dog_models.Dog.objects.get(pk=form.data['dog']) + except dog_models.Dog.DoesNotExist: + raise http.Http404() + + def form_valid(self, form): + auth_user = self.request.user + if auth_user.is_authenticated() and auth_user.email == form.cleaned_data['email'].lower(): + try: + user = auth_user.kapustkinpitomnik_user + except models.User.DoesNotExist: + user = None + else: + auth_user = auth.authenticate(form.cleaned_data['email'], settings.ANY_PASSWORD) + if auth_user: + try: + user = auth_user.kapustkinpitomnik_user + except models.User.DoesNotExist: + user = None + else: + auth_user = auth.create_user(form.cleaned_data['email'].lower(), settings.ANY_PASSWORD) + user = None + auth.login(self.request, auth_user) + auth_user.first_name = form.cleaned_data['name'] + auth_user.save() + if user is None: + user = models.User( + auth_user=auth_user, + ) + user.phone = form.cleaned_data['phone'] + _update_user_subscriptions(user, form.cleaned_data['subscribe']) + user.save() + subject_dict = { + 'name': auth_user.first_name, + 'dog': self.dog.get_name(), + } + if 'for_breeding' == self.dog.status: + subject_dict['action'] = 'breed with' + elif 'fertile' == self.dog.status: + subject_dict['action'] = 'take a puppy from' + else: + subject_dict['action'] = 'buy' + mail.mail_managers( + '%(name)s wants to %(action)s %(dog)s' % subject_dict, + 'Email: %s\nDog\'s page: http://kapustkapust.ru%s\nPhone: %s' % (auth_user.email, self.dog.get_url(), user.phone), + ) + return self.render_to_response(self.get_context_data(success=True)) + + +class Sitemap(sitemaps.CMSSitemap): + def items(self): + pages = super(Sitemap, self).items() + return pages.exclude(reverse_id='common') diff --git a/instabot_runner.py b/instabot_runner.py new file mode 100755 index 0000000..f3a231c --- /dev/null +++ b/instabot_runner.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from instabot.instabot import main + +if __name__ == '__main__': + main() diff --git a/instagram.py b/instagram.py deleted file mode 100644 index 4526254..0000000 --- a/instagram.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -import mechanize -import urllib - -BASE_URL = 'https://www.instagram.com/' - -class APIError(Exception): - def __init__(self, message, error): - self.message = message + ' ' + str(error) - self.status_code = error.getcode() - -class Client(object): - def __init__(self): - self._cookiejar = mechanize.CookieJar() - self._browser = mechanize.Browser() - self._browser.set_cookiejar(self._cookiejar) - - def _ajax(self, url, data=None, referer=BASE_URL): - if isinstance(data, dict): - data = urllib.urlencode(data) - headers = { - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': referer, - 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:43.0) Gecko/20100101 Firefox/43.0', - 'X-CSRFToken': self._csrf_token, - 'X-Instagram-AJAX': '1', - 'X-Requested-With': 'XMLHttpRequest', - } - request = mechanize.Request( - BASE_URL + url, - data=data, - headers=headers, - ) - self._cookiejar.add_cookie_header(request) - try: - response = self._browser.open(request) - except Exception as e: - e = APIError('Error during making AJAX request.', e) - e.status_code = e.status_code - raise e - return response - - def _get_cookie(self, name): - for cookie in self._cookiejar: - if cookie.name == name: - return cookie.value - raise KeyError() - - def like(self, media_id): - self._ajax('web/likes/%s/like/' % media_id, data='') - - def login(self, login, password): - login_page_url = BASE_URL - response = self._browser.open(login_page_url) - self._update_csrf_token() - login_response = self._ajax('accounts/login/ajax/', referer=login_page_url, data={ - 'username': login, - 'password': password, - }) - self._update_csrf_token() # CSRF token is refreshed after login request. - - def _update_csrf_token(self): - self._csrf_token = self._get_cookie('csrftoken') - logging.debug('csrftoken is %s', self._csrf_token) diff --git a/requirements.txt b/requirements.txt index d4d2156..9845c51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ -mechanize >= 0.2.5 -PyYAML >= 3.11 -requests >= 2.7.0 +asyncio>=3.4.3,<4.0 +docopt>=0.6.2,<0.7 +PyYAML>=3.11 +peewee>=2.7.4 diff --git a/schedule.py b/schedule.py deleted file mode 100644 index 9be8e96..0000000 --- a/schedule.py +++ /dev/null @@ -1,63 +0,0 @@ -import re -import requests -import urllib - -WEBSTA_URL = 'http://websta.me/' - -class ScheduleError(Exception): - pass - -class Schedule(object): - def __init__(self, hashtags_filename, configuration, state): - self._configuration = configuration - self._hashtags = self._get_hashtags_from_file(hashtags_filename) - self._state = state - self._update_media_ids(self._get_current_hashtag()) - - def next(self): - media_id_index = self._state.increment('schedule_media_id_index', 0) - if media_id_index >= len(self._media_ids) or media_id_index >= self._configuration['PERHASHTAG']: - media_id_index = 0 - self._state['schedule_media_id_index'] = media_id_index - self._update_media_ids(self._get_next_hashtag()) - return self._media_ids[media_id_index] - - def _get_hashtags_from_file(self, filename): - ''' Function to read the hashtags from a users file if not wanting to parse the top 100. ''' - hashtags = [] - f = open(filename, 'rb') - # Strip newline character. - hashtags = [unicode(line, 'utf-8').strip() for line in open(filename)] - f.close() - return hashtags - - def _get_current_hashtag(self): - hashtag_index = self._state.get('schedule_hashtag_index', 0) - try: - hashtag = self._hashtags[hashtag_index] - except IndexError: - hashtag_index = 0 - try: - hashtag = self._hashtags[hashtag_index] - except IndexError: - raise ScheduleError('Hashtags array is empty.') - self._state['schedule_hashtag_index'] = hashtag_index - return hashtag - - def _get_next_hashtag(self): - hashtag_index = self._state.increment('schedule_hashtag_index', 0) - try: - hashtag = self._hashtags[hashtag_index] - except IndexError: - hashtag_index = 0 - try: - hashtag = self._hashtags[hashtag_index] - except IndexError: - raise ScheduleError('Hashtags array is empty.') - self._state['schedule_hashtag_index'] = hashtag_index - return hashtag - - def _update_media_ids(self, hashtag): - hashtag_url = WEBSTA_URL +'tag/' + urllib.quote(hashtag.encode('utf-8')) - response = requests.get(hashtag_url) - self._media_ids = re.findall('span class=\"like_count_(.*)\"', response.text) diff --git a/state.py b/state.py deleted file mode 100644 index 9f819e8..0000000 --- a/state.py +++ /dev/null @@ -1,46 +0,0 @@ -import yaml - -class State: - def __init__(self, path): - self._path = path - try: - f = open(path, 'rb') - except IOError: - self._data = {} - return - self._data = yaml.load(f) - f.close() - - def __delitem__(self, key, value): - del self._data[key] - self._save() - - def __getitem__(self, key): - return self._data[key] - - def __setitem__(self, key, value): - self._data[key] = value - self._save() - - def _save(self): - f = open(self._path, 'wb') - yaml.dump(self._data, f) - f.close() - - def increment(self, key, default_value=1): - try: - value = self._data[key] - except KeyError: - value = default_value - else: - value += 1 - self[key] = value - return value - - def get(self, key, default_value=None): - try: - value = self._data[key] - except KeyError: - value = default_value - self[key] = value - return value From 40d999f10ed4093400a29276459db2a1c60c4823 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Thu, 18 Feb 2016 22:57:21 +0300 Subject: [PATCH 10/48] Bot installation works properly. --- instabot/instabot.py | 12 +++-- instabot/instagram.py | 114 ++++++++++++++++++++++++++---------------- instabot/user.py | 2 +- 3 files changed, 79 insertions(+), 49 deletions(-) diff --git a/instabot/instabot.py b/instabot/instabot.py index 721b5f5..8d0503c 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -1,3 +1,4 @@ +import asyncio import datetime import logging import logging.config @@ -37,17 +38,18 @@ def install(client, configuration, db): was_followed_at = now - datetime.timedelta(hours=configuration.following_hours) User.create( following_depth=0, - instagram_id=instagram_id, + instagram_id=client.id, was_followed_at=was_followed_at, # To prevent attempts to follow user by himself. - were_followers_fetched=True, ) - for follower_id in asyncio.run_until_complete(client.get_followers()): + loop = asyncio.get_event_loop() + for followed_id in loop.run_until_complete(client.get_followed(client.id)): User.create( - following_depth=1, - instagram_id=follower_id, + following_depth=0, + instagram_id=followed_id, is_followed=True, was_followed_at=was_followed_at, ) + LOGGER.info('Followed users were saved in DB') def main(): arguments = docopt(DOC, version=__version__) diff --git a/instabot/instagram.py b/instabot/instagram.py index 088ba5a..39bba67 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -1,42 +1,48 @@ import aiohttp import asyncio import logging +import json +import re from .errors import APIError, APILimitError API_URL = 'https://api.instagram.com/v1/' BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot') +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:44.0) Gecko/20100101 Firefox/44.0' class Client(object): def __init__(self, configuration): self._client_id = configuration.instagram_client_id - #self._cookiejar = mechanize.CookieJar() - #self._browser = mechanize.Browser() - #self._browser.set_cookiejar(self._cookiejar) self._login = configuration.instagram_login self._password = configuration.instagram_password + self._referer = None self._session = aiohttp.ClientSession() loop = asyncio.get_event_loop() loop.run_until_complete(self._do_login()) @asyncio.coroutine - def _ajax(self, url, params=None, referer=None): + def _ajax(self, url, data=None): headers = { + 'Accept': '*/*', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US,en;q=0.7,ru;q=0.3', + 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:43.0) Gecko/20100101 Firefox/43.0', + 'User-Agent': USER_AGENT, 'X-CSRFToken': self._csrf_token, 'X-Instagram-AJAX': '1', 'X-Requested-With': 'XMLHttpRequest', } - if referer is not None: - headers['Referer'] = referer + if self._referer is not None: + headers['Referer'] = self._referer + url = BASE_URL + url try: - response = yield from self._session.get( - BASE_URL + url, - params=params, + response = yield from self._session.post( + url, + data=data, headers=headers, ) - response = yield from response.json() + response = yield from response.text() except Exception as e: raise self._build_error(e) return response @@ -48,53 +54,75 @@ def _build_error(self, e): return APIError(e) @asyncio.coroutine - def get_followers(self, user): - response = yield from self._get_followers(user=user) - for follower in self._parse_followers(response): - yield follower + def _do_login(self): + yield from self._open(BASE_URL) + self._update_csrf_token() + login_response = yield from self._ajax('accounts/login/ajax/', data={ + 'username': self._login, + 'password': self._password, + }) + yield from self._update_id() + + @asyncio.coroutine + def get_followed(self, user): + url = '{0}users/{1}/follows?client_id={2}'.format(API_URL, user, self._client_id) + response = yield from aiohttp.get(url) + response = yield from response.json() + followed = self._parse_followed(response) next_url = response['pagination'].get('next_url') while next_url: yield from asyncio.sleep(.7) - response = yield from self._get_followers(url=next_url) - for follower in self._parse_followers(response): - yield follower + response = yield from aiohttp.get(next_url) + response = yield from response.json() + followed.extend(self._parse_followed(response)) next_url = response['pagination'].get('next_url') + LOGGER.debug('%d followed users were fetched.', len(followed)) + return followed @asyncio.coroutine def get_some_followers(self, user): - response = yield from self._get_followers(user) - return self._parse_followers(response) - - @asyncio.coroutine - def _get_followers(self, user=None, url=None): - if url is None: - url = '{0}users/{1}/followed-by?client_id={2}'.format(API_URL, user, self._client_id) + url = '{0}users/{1}/followed-by?client_id={2}'.format(API_URL, user, self._client_id) response = yield from aiohttp.get(url) response = yield from response.json() - return response - - @asyncio.coroutine - def _open(self, url): - response = yield from self._session.get(url) - yield from response.text() - - def _parse_followers(self, response): - return [follower['id'] for follower in response['params']] + followers = self._parse_followed(response) + LOGGER.debug('%d followers users were fetched.', len(followers)) + return followers @asyncio.coroutine def like(self, media): - yield from self._ajax('web/likes/%s/like/' % media, params='') + yield from self._ajax('web/likes/{0}/like/'.format(media)) @asyncio.coroutine - def _do_login(self): - login_page_url = BASE_URL - response = yield from self._open(login_page_url) - self._update_csrf_token() - login_response = yield from self._ajax('accounts/login/ajax/', referer=login_page_url, params={ - 'username': self._login, - 'password': self._password, - }) + def _open(self, url): + headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US,en;q=0.7,ru;q=0.3', + 'Connection': 'keep-alive', + 'DNT': '1', + 'User-Agent': USER_AGENT, + } + if self._referer is not None: + headers['Referer'] = self._referer + response = yield from self._session.get(url) + self._referer = url + return (yield from response.text()) + + def _parse_followed(self, response): + return [follower['id'] for follower in response['data']] def _update_csrf_token(self): self._csrf_token = self._session.cookies['csrftoken'].value LOGGER.debug('API. CSRF token is %s', self._csrf_token) + + @asyncio.coroutine + def _update_id(self): + response = yield from self._open(BASE_URL) + match = re.search( + '\\s*window\\._sharedData\\s*=\\s*' \ + '([^<]*(<(?!/script>)[^<]*)*)\\s*;\\s*', + response, + re.DOTALL, + ) + response = json.loads(match.group(1)) + self.id = response['config']['viewer']['id'] diff --git a/instabot/user.py b/instabot/user.py index 42761cf..7299f2b 100644 --- a/instabot/user.py +++ b/instabot/user.py @@ -12,7 +12,7 @@ class User(Model): created = DateTimeField(default=datetime.datetime.utcnow) class Meta: - db = database_proxy + database = database_proxy indexes = ( (('is_followed', 'was_followed_at'), False), (('were_followers_fetched', 'following_depth', 'created'), False) From f395b9e8d18ecfb22745f4e7b215f1377658166d Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Fri, 19 Feb 2016 18:03:59 +0300 Subject: [PATCH 11/48] UserService is working. --- instabot/errors.py | 16 ++++++--- instabot/instabot.py | 11 +++---- instabot/instagram.py | 70 +++++++++++++++++++++++++++------------- instabot/user_service.py | 50 +++++++++++++++++++++------- 4 files changed, 101 insertions(+), 46 deletions(-) diff --git a/instabot/errors.py b/instabot/errors.py index 3453e9b..2e26b8d 100644 --- a/instabot/errors.py +++ b/instabot/errors.py @@ -1,9 +1,17 @@ class APIError(Exception): - def __init__(self, message, error): - self.message = message + ' ' + str(error) - self.status_code = error.getcode() + def __init__(self, response): + super(APIError, self).__init__( + '{0} ({1}): {2}'.format( + response['meta']['code'], + response['meta']['error_type'], + response['meta']['error_message'], + ) + ) -class APILimitError(Exception): +class APILimitError(APIError): + pass + +class APINotAllowedError(APIError): pass class DBError(Exception): diff --git a/instabot/instabot.py b/instabot/instabot.py index 8d0503c..61a3b16 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -26,10 +26,7 @@ Arguments: CONFIGURATION Path to configuration.yml file. ''' -INSTAGRAM_API = 'https://api.instagram.com/v1' LOGGER = logging.getLogger('instabot') -USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) ' \ - 'Chrome/23.0.1271.64 Safari/537.11' __version__ = '0.2' def install(client, configuration, db): @@ -67,9 +64,9 @@ def main(): install(client, configuration, db) else: LOGGER.info('Executing InstaBot') - run(clientconfiguration) + run(client, configuration) -def run(configuration): +def run(client, configuration): loop = asyncio.get_event_loop() stats_service = StatsService() @@ -77,7 +74,7 @@ def run(configuration): user_service = UserService(client) loop.create_task(user_service.run()) - + following_service = FollowingService(client) loop.create_task(following_service.run()) @@ -86,5 +83,5 @@ def run(configuration): like_service = LikeService(client, media_service) loop.create_task(like_service.run()) - + loop.run_forever() diff --git a/instabot/instagram.py b/instabot/instagram.py index 39bba67..3a15dd8 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -3,9 +3,8 @@ import logging import json import re -from .errors import APIError, APILimitError +from .errors import APIError, APILimitError, APINotAllowedError -API_URL = 'https://api.instagram.com/v1/' BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot') USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:44.0) Gecko/20100101 Firefox/44.0' @@ -36,22 +35,42 @@ def _ajax(self, url, data=None): if self._referer is not None: headers['Referer'] = self._referer url = BASE_URL + url - try: - response = yield from self._session.post( - url, - data=data, - headers=headers, - ) - response = yield from response.text() - except Exception as e: - raise self._build_error(e) + response = yield from self._session.post( + url, + data=data, + headers=headers, + ) + response = yield from response.text() + return response + + @asyncio.coroutine + def _api(self, path): + ''' + @raise APIError + ''' + response = yield from aiohttp.get( + 'https://api.instagram.com/v1/{0}'.format(path), + params={ + 'client_id': self._client_id, + }, + ) + response = yield from response.json() + self._check_response(response) return response - def _build_error(self, e): - if e.getcode() in (403, 429): - return APILimitError() + def _check_response(self, response): + ''' + @raise APIError + ''' + code = response['meta']['code'] + if code == 200: + return + elif code == 400: + raise APINotAllowedError(response) + elif code in (403, 429): + raise APILimitError(response) else: - return APIError(e) + raise APIError(response) @asyncio.coroutine def _do_login(self): @@ -65,9 +84,10 @@ def _do_login(self): @asyncio.coroutine def get_followed(self, user): - url = '{0}users/{1}/follows?client_id={2}'.format(API_URL, user, self._client_id) - response = yield from aiohttp.get(url) - response = yield from response.json() + ''' + @raise APIError + ''' + response = yield from self._api('users/{0}/follows'.format(user)) followed = self._parse_followed(response) next_url = response['pagination'].get('next_url') while next_url: @@ -81,11 +101,11 @@ def get_followed(self, user): @asyncio.coroutine def get_some_followers(self, user): - url = '{0}users/{1}/followed-by?client_id={2}'.format(API_URL, user, self._client_id) - response = yield from aiohttp.get(url) - response = yield from response.json() + ''' + @raise APIError + ''' + response = yield from self._api('users/{0}/followed-by'.format(user)) followers = self._parse_followed(response) - LOGGER.debug('%d followers users were fetched.', len(followers)) return followers @asyncio.coroutine @@ -109,7 +129,11 @@ def _open(self, url): return (yield from response.text()) def _parse_followed(self, response): - return [follower['id'] for follower in response['data']] + try: + response = response['data'] + except KeyError: + raise APIError() + return [follower['id'] for follower in response] def _update_csrf_token(self): self._csrf_token = self._session.cookies['csrftoken'].value diff --git a/instabot/user_service.py b/instabot/user_service.py index 243370c..d679aec 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -1,5 +1,7 @@ import asyncio import logging +import peewee +from .errors import APIError, APILimitError, APINotAllowedError from .stats_service import StatsService from .user import User @@ -14,12 +16,13 @@ def __init__(self, client): @asyncio.coroutine def run(self): while True: + LOGGER.debug('UserService cycle') try: yield from self._ensure_enough_users() - except InstagramLimitError as e: + except APILimitError as e: LOGGER.debug('Fetching users. Instagram limits were reached: %s', e) yield from asyncio.sleep(60) - except Exception as e: + except IOError as e: LOGGER.debug('Fetching users. Some troubles: %s', e) yield from asyncio.sleep(5) else: @@ -27,7 +30,8 @@ def run(self): @asyncio.coroutine def _ensure_enough_users(self): - users_to_follow_count = len(User.select().where(User.subscribed_at == None)) + users_to_follow_count = User.select().where(User.was_followed_at == None).count() + LOGGER.debug('{0} users to follow found'.format(users_to_follow_count)) if users_to_follow_count < USERS_LIMIT: last_users_to_follow_count = users_to_follow_count for user in User.select().where(User.were_followers_fetched == False).order_by( @@ -35,16 +39,38 @@ def _ensure_enough_users(self): User.created, ): following_depth = user.following_depth + 1 - for follower_id in (yield from client.get_some_followers(user.instagram_id)): - follower, created = User.get_or_create(instagram_id=follower_id) - if created: - follower.following_depth = following_depth - follower.save() + try: + followers = yield from self._client.get_some_followers(user.instagram_id) + except APINotAllowedError as e: + LOGGER.debug( + 'Fetching users. Can\'t fetch followers of {0}: {1}'.format( + user.instagram_id, + e, + ), + ) + user.were_followers_fetched = True + user.save() + yield from asyncio.sleep(.7) + continue + user.were_followers_fetched = True + user.save() + LOGGER.debug( + 'Fetching users. {0} followers of {1} were fetched'.format( + len(followers), + user.instagram_id, + ), + ) + for follower_id in followers: + try: + User.create( + instagram_id=follower_id, + following_depth=following_depth, + ) + except peewee.IntegrityError: + pass + else: users_to_follow_count += 1 - STATS_SERVICE.increment('users_to_follow_fetched') - elif follower.following_depth > following_depth: - follower.following_depth = following_depth - follower.save() + self._stats_service.increment('users_to_follow_fetched') if users_to_follow_count >= USERS_LIMIT: break LOGGER.debug( From 4e29c6a99a230de936e4ca2457ec09320fadc643 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Sun, 21 Feb 2016 01:53:43 +0300 Subject: [PATCH 12/48] FollowingService is working. --- instabot/configuration.py | 4 +- instabot/errors.py | 17 ++- instabot/following_service.py | 36 +++--- instabot/instabot.py | 13 ++- instabot/instagram.py | 200 ++++++++++++++++++++++------------ instabot/user.py | 8 +- instabot/user_service.py | 36 +++--- 7 files changed, 192 insertions(+), 122 deletions(-) diff --git a/instabot/configuration.py b/instabot/configuration.py index 5819117..96d5404 100644 --- a/instabot/configuration.py +++ b/instabot/configuration.py @@ -18,12 +18,12 @@ def __init__(self, filename): self.db_password = configuration['db']['password'] self.following_hours = configuration['following_hours'] self.instagram_client_id = configuration['credentials']['client_id'] - self.instagram_login = configuration['credentials']['login'] + self.instagram_username = configuration['credentials']['username'] self.instagram_password = configuration['credentials']['password'] self.logging = configuration['logging'] self.hashtags = configuration['hashtags'] except KeyError as e: - sys.exit('Configuration is not fully specified: %s' % e) + sys.exit('Configuration is not fully specified. {0} is missed.'.format(e)) try: self.following_hours = int(self.following_hours) except ValueError as e: diff --git a/instabot/errors.py b/instabot/errors.py index 2e26b8d..625cdf0 100644 --- a/instabot/errors.py +++ b/instabot/errors.py @@ -1,17 +1,14 @@ class APIError(Exception): - def __init__(self, response): - super(APIError, self).__init__( - '{0} ({1}): {2}'.format( - response['meta']['code'], - response['meta']['error_type'], - response['meta']['error_message'], - ) - ) + def __init__(self, code): + super(APIError, self).__init__(str(code)) -class APILimitError(APIError): +class APIJSONError(Exception): pass -class APINotAllowedError(APIError): +class APILimitError(APIJSONError): + pass + +class APINotAllowedError(APIJSONError): pass class DBError(Exception): diff --git a/instabot/following_service.py b/instabot/following_service.py index f1ccf20..8f88b1e 100644 --- a/instabot/following_service.py +++ b/instabot/following_service.py @@ -1,39 +1,49 @@ import asyncio import datetime import logging -from .errors import APILimitError +from .errors import APIError, APILimitError from .user import User -LOGGER = logging.getLogger('instabot') +LOGGER = logging.getLogger('instabot.following_service') class FollowingService: - LONG_AGO_TIMEDELTA = datetime.timedelta(days=5) - - def __init__(self, client): + def __init__(self, client, configuration): self._client = client + self._following_timedelta = datetime.timedelta(hours=configuration.following_hours) @asyncio.coroutine def run(self): while True: + LOGGER.debug('Cycle') try: yield from self._unfollow() yield from self._follow() except APILimitError as e: - LOGGER.debug('Instagram limit was reached during following: %s', e) - yield from asyncio.sleep(60) + LOGGER.debug(e) + yield from asyncio.sleep(30) + except APIError as e: + LOGGER.debug(e) + yield from asyncio.sleep(10) else: yield from asyncio.sleep(10) @asyncio.coroutine def _follow(self): for user in User.select().where(User.was_followed_at == None).order_by( - User.friending_depth.desc(), - User.friends_fetched.desc(), + User.following_depth, + User.created, ): - self._client.follow(user) + yield from self._client.follow(user) + user.is_followed = True + user.was_followed_at = datetime.datetime.utcnow() + user.save() @asyncio.coroutine def _unfollow(self): - long_ago = datetime.datetime.utcnow() - LONG_AGO_TIMEDELTA - for user in User.select().where((User.is_followed == True) & (User.was_followed_at < long_ago)): - self._client.unfollow(user) + unfollowing_threshold = datetime.datetime.utcnow() - self._following_timedelta + for user in User.select().where( + (User.is_followed == True) & (User.was_followed_at <= unfollowing_threshold), + ): + yield from self._client.unfollow(user) + user.is_followed = False + user.save() diff --git a/instabot/instabot.py b/instabot/instabot.py index 61a3b16..ab1b340 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -33,20 +33,23 @@ def install(client, configuration, db): db.create_tables([User]) now = datetime.datetime.utcnow() was_followed_at = now - datetime.timedelta(hours=configuration.following_hours) - User.create( + user = User.create( following_depth=0, instagram_id=client.id, + username=configuration.instagram_username, was_followed_at=was_followed_at, # To prevent attempts to follow user by himself. ) loop = asyncio.get_event_loop() - for followed_id in loop.run_until_complete(client.get_followed(client.id)): + followed_users_json = loop.run_until_complete(client.get_followed(user)) + for followed_json in followed_users_json: User.create( following_depth=0, - instagram_id=followed_id, + instagram_id=followed_json['id'], is_followed=True, + username=followed_json['username'], was_followed_at=was_followed_at, ) - LOGGER.info('Followed users were saved in DB') + LOGGER.info('{0} followed users were saved in DB'.format(len(followed_users_json))) def main(): arguments = docopt(DOC, version=__version__) @@ -75,7 +78,7 @@ def run(client, configuration): user_service = UserService(client) loop.create_task(user_service.run()) - following_service = FollowingService(client) + following_service = FollowingService(client, configuration) loop.create_task(following_service.run()) media_service = MediaService(configuration) diff --git a/instabot/instagram.py b/instabot/instagram.py index 3a15dd8..f6cb631 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -3,37 +3,48 @@ import logging import json import re -from .errors import APIError, APILimitError, APINotAllowedError +from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError BASE_URL = 'https://www.instagram.com/' -LOGGER = logging.getLogger('instabot') +LOGGER = logging.getLogger('instabot.instagram') USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:44.0) Gecko/20100101 Firefox/44.0' class Client(object): def __init__(self, configuration): self._client_id = configuration.instagram_client_id - self._login = configuration.instagram_login + self._username = configuration.instagram_username self._password = configuration.instagram_password - self._referer = None - self._session = aiohttp.ClientSession() + self._referer = BASE_URL + self._session = aiohttp.ClientSession( + cookies={ + 'ig_pr': '1', + 'ig_vw': '1280', + }, + headers={ + 'User-Agent': USER_AGENT, + 'X-Instagram-AJAX': '1', + 'X-Requested-With': 'XMLHttpRequest', + }, + ) + self._anonymous_session = aiohttp.ClientSession( + headers={ + 'User-Agent': USER_AGENT, + }, + ) loop = asyncio.get_event_loop() loop.run_until_complete(self._do_login()) @asyncio.coroutine - def _ajax(self, url, data=None): + def _ajax(self, url, data=None, referer=None): + ''' + @raise APIError + ''' + if referer is not None: + self._referer = referer headers = { - 'Accept': '*/*', - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'en-US,en;q=0.7,ru;q=0.3', - 'Connection': 'keep-alive', - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'User-Agent': USER_AGENT, + 'Referer': self._referer, 'X-CSRFToken': self._csrf_token, - 'X-Instagram-AJAX': '1', - 'X-Requested-With': 'XMLHttpRequest', - } - if self._referer is not None: - headers['Referer'] = self._referer + } url = BASE_URL + url response = yield from self._session.post( url, @@ -41,60 +52,105 @@ def _ajax(self, url, data=None): headers=headers, ) response = yield from response.text() + try: + response = json.loads(response) + except ValueError as e: + if 'too many requests' in response: + raise APILimitError('Too many AJAX requests to {0}'.format(url)) + raise APIError( + 'AJAX request to {0} is not JSON: {1} Response: {2}'.format(url, e, response), + ) + if response.get('status') != 'ok': + raise APIError('AJAX request to {0} is not OK: {1}'.format(url, response)) + yield from asyncio.sleep(1.5) return response @asyncio.coroutine - def _api(self, path): + def _api(self, path=None, url=None): ''' + @raise APIJSONError @raise APIError ''' - response = yield from aiohttp.get( - 'https://api.instagram.com/v1/{0}'.format(path), + if url is None: + url = 'https://api.instagram.com/v1/{0}'.format(path) + response = yield from self._anonymous_session.get( + url, params={ 'client_id': self._client_id, }, ) - response = yield from response.json() - self._check_response(response) + response = yield from response.text() + try: + response = json.loads(response) + except ValueError as e: + raise APIError('Bad response for {0}: {1} Response: {2}'.format(url, e, response)) + self._check_api_response(response) + yield from asyncio.sleep(1.5) return response - def _check_response(self, response): + def _check_api_response(self, response): ''' - @raise APIError + @raise APIJSONError ''' code = response['meta']['code'] if code == 200: return - elif code == 400: - raise APINotAllowedError(response) + message = '{0} ({1}): {2}'.format( + code, + response['meta']['error_type'], + response['meta']['error_message'], + ) + if code == 400: + raise APINotAllowedError(message) elif code in (403, 429): - raise APILimitError(response) + raise APILimitError(message) else: - raise APIError(response) + raise APIJSONError(message) @asyncio.coroutine def _do_login(self): + ''' + @raise APIError + ''' yield from self._open(BASE_URL) self._update_csrf_token() - login_response = yield from self._ajax('accounts/login/ajax/', data={ - 'username': self._login, - 'password': self._password, - }) - yield from self._update_id() + yield from self._ajax( + 'accounts/login/ajax/', + data={ + 'username': self._username, + 'password': self._password, + }, + ) + self._update_csrf_token() + self.id = self._session.cookies['ds_user_id'].value @asyncio.coroutine - def get_followed(self, user): + def follow(self, user): ''' + @raise APIJSONError @raise APIError ''' - response = yield from self._api('users/{0}/follows'.format(user)) - followed = self._parse_followed(response) + try: + yield from self._ajax( + 'web/friendships/{0}/follow/'.format(user), + referer=user.get_url(), + ) + except (APIError, APILimitError) as e: + raise APIError('Troubles during following {0}: {1}'.format(user.instagram_id, e)) + else: + LOGGER.debug('{0} was followed.'.format(user.username)) + + @asyncio.coroutine + def get_followed(self, user): + ''' + @raise APIJSONError + ''' + response = yield from self._api('users/{0}/follows'.format(user.instagram_id)) + followed = response['data'] next_url = response['pagination'].get('next_url') while next_url: - yield from asyncio.sleep(.7) - response = yield from aiohttp.get(next_url) - response = yield from response.json() - followed.extend(self._parse_followed(response)) + response = yield from self._api(url=next_url) + followed.extend(response['data']) next_url = response['pagination'].get('next_url') LOGGER.debug('%d followed users were fetched.', len(followed)) return followed @@ -102,51 +158,51 @@ def get_followed(self, user): @asyncio.coroutine def get_some_followers(self, user): ''' - @raise APIError + @raise APIJSONError ''' - response = yield from self._api('users/{0}/followed-by'.format(user)) - followers = self._parse_followed(response) + response = yield from self._api('users/{0}/followed-by'.format(user.instagram_id)) + followers = response['data'] return followers @asyncio.coroutine def like(self, media): - yield from self._ajax('web/likes/{0}/like/'.format(media)) + try: + yield from self._ajax('web/likes/{0}/like/'.format(media)) + except (APIError, APILimitError) as e: + raise APIError('Troubles during liking {0}: {1}'.format(user.instagram_id, e)) + else: + LOGGER.debug('Liked {0}'.format(media)) @asyncio.coroutine def _open(self, url): headers = { - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'en-US,en;q=0.7,ru;q=0.3', - 'Connection': 'keep-alive', - 'DNT': '1', - 'User-Agent': USER_AGENT, - } - if self._referer is not None: - headers['Referer'] = self._referer - response = yield from self._session.get(url) + 'Referer': self._referer, + } + response = yield from self._session.get(url, headers=headers) self._referer = url - return (yield from response.text()) + response = yield from response.text() + return response - def _parse_followed(self, response): + @asyncio.coroutine + def unfollow(self, user): + ''' + @raise APIJSONError + @raise APIError + ''' try: - response = response['data'] - except KeyError: - raise APIError() - return [follower['id'] for follower in response] + response = yield from self._ajax( + 'web/friendships/{0}/unfollow/'.format(user.instagram_id), + referer=user.get_url(), + ) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during unfollowing {0}: {1}'.format(user.username, e), + ) + except APIError as e: + raise APIError('API troubles during unfollowing {0}: {1}'.format(user.username, e)) + else: + LOGGER.debug('{0} was unfollowed.'.format(user.username)) def _update_csrf_token(self): self._csrf_token = self._session.cookies['csrftoken'].value LOGGER.debug('API. CSRF token is %s', self._csrf_token) - - @asyncio.coroutine - def _update_id(self): - response = yield from self._open(BASE_URL) - match = re.search( - '\\s*window\\._sharedData\\s*=\\s*' \ - '([^<]*(<(?!/script>)[^<]*)*)\\s*;\\s*', - response, - re.DOTALL, - ) - response = json.loads(match.group(1)) - self.id = response['config']['viewer']['id'] diff --git a/instabot/user.py b/instabot/user.py index 7299f2b..23c2044 100644 --- a/instabot/user.py +++ b/instabot/user.py @@ -4,12 +4,13 @@ database_proxy = Proxy() class User(Model): + created = DateTimeField(default=datetime.datetime.utcnow) + following_depth = IntegerField() instagram_id = CharField(max_length=20, unique=True) is_followed = BooleanField(default=False) + username = CharField(max_length=60) was_followed_at = DateTimeField(null=True) were_followers_fetched = BooleanField(default=False) - following_depth = IntegerField() - created = DateTimeField(default=datetime.datetime.utcnow) class Meta: database = database_proxy @@ -17,3 +18,6 @@ class Meta: (('is_followed', 'was_followed_at'), False), (('were_followers_fetched', 'following_depth', 'created'), False) ) + + def get_url(self): + return 'https://www.instagram.com/{0}/'.format(self.username) diff --git a/instabot/user_service.py b/instabot/user_service.py index d679aec..6bbcf57 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -5,8 +5,8 @@ from .stats_service import StatsService from .user import User -LOGGER = logging.getLogger('instabot') -USERS_LIMIT = 1000 +LOGGER = logging.getLogger('instabot.user_service') +USERS_TO_FOLLOW_COUNT_MIN = 1000 class UserService: def __init__(self, client): @@ -16,23 +16,23 @@ def __init__(self, client): @asyncio.coroutine def run(self): while True: - LOGGER.debug('UserService cycle') + LOGGER.debug('Cycle') try: yield from self._ensure_enough_users() except APILimitError as e: - LOGGER.debug('Fetching users. Instagram limits were reached: %s', e) + LOGGER.debug('Instagram limits were reached: %s', e) yield from asyncio.sleep(60) except IOError as e: - LOGGER.debug('Fetching users. Some troubles: %s', e) + LOGGER.warning(e) yield from asyncio.sleep(5) else: - yield from asyncio.sleep(5) + yield from asyncio.sleep(60) @asyncio.coroutine def _ensure_enough_users(self): users_to_follow_count = User.select().where(User.was_followed_at == None).count() LOGGER.debug('{0} users to follow found'.format(users_to_follow_count)) - if users_to_follow_count < USERS_LIMIT: + if users_to_follow_count < USERS_TO_FOLLOW_COUNT_MIN: last_users_to_follow_count = users_to_follow_count for user in User.select().where(User.were_followers_fetched == False).order_by( User.following_depth, @@ -40,40 +40,40 @@ def _ensure_enough_users(self): ): following_depth = user.following_depth + 1 try: - followers = yield from self._client.get_some_followers(user.instagram_id) + followers_json = yield from self._client.get_some_followers(user) except APINotAllowedError as e: LOGGER.debug( - 'Fetching users. Can\'t fetch followers of {0}: {1}'.format( - user.instagram_id, + 'Can\'t fetch followers of {0}: {1}'.format( + user.username, e, ), ) user.were_followers_fetched = True user.save() - yield from asyncio.sleep(.7) continue user.were_followers_fetched = True user.save() LOGGER.debug( - 'Fetching users. {0} followers of {1} were fetched'.format( - len(followers), - user.instagram_id, + '{0} followers of {1} were fetched'.format( + len(followers_json), + user.username, ), ) - for follower_id in followers: + for follower_json in followers_json: try: User.create( - instagram_id=follower_id, + instagram_id=follower_json['id'], following_depth=following_depth, + username=follower_json['username'], ) except peewee.IntegrityError: pass else: users_to_follow_count += 1 self._stats_service.increment('users_to_follow_fetched') - if users_to_follow_count >= USERS_LIMIT: + if users_to_follow_count >= USERS_TO_FOLLOW_COUNT_MIN: break LOGGER.debug( - 'Fetching users. %d users fetched.', + '%d users fetched.', users_to_follow_count - last_users_to_follow_count, ) From 9e7c91f1806f3dbecccc1f36fba18fc7f479881b Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Mon, 22 Feb 2016 20:08:17 +0300 Subject: [PATCH 13/48] LikeService is working. --- README.md | 61 +++++++++----- instabot/configuration.py | 22 +++-- instabot/errors.py | 10 ++- instabot/following_service.py | 44 ++++++++-- instabot/instabot.py | 39 +++++---- instabot/instagram.py | 150 ++++++++++++++++++++++++++-------- instabot/like_service.py | 37 +++++---- instabot/media_service.py | 39 ++++++--- instabot/stats_service.py | 11 ++- instabot/user.py | 2 +- instabot/user_service.py | 19 +++-- 11 files changed, 304 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index ede794f..4d791b2 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,31 @@ # Instagram Bot -A simple Instagram bot that cycles through hashtags listed at a file and automatically likes pictures with those hashtags to get more followers. +Instagram bot that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses followers of people you have followed. -## Setup +During installation process it saves people followed by you as "followed long time ago" and unfollows them at the first start. -At first, get the source. Clone this repository: +## Deployment - $ git clone https://github.com/quasiyoke/InstaBot.git + $ virtualenv --python=/usr/bin/python3 instabotenv + $ cd instabotenv + $ source bin/activate + (instabotenv) $ git clone https://github.com/quasiyoke/InstaBot.git + (instabotenv) $ pip install -r requirements.txt -### Requirements +Create MySQL DB: -You can install all needed requirements with single command: - - $ pip install -r requirements.txt - -### Configuration +```mysql +CREATE DATABASE IF NOT EXISTS instagram CHARACTER SET utf8 COLLATE utf8_general_ci; +CREATE USER instabot@localhost IDENTIFIED BY 'GT8H!b]5,9}A7'; +GRANT ALL ON instagram.* TO instabot@localhost; +``` -Create `configuration.yml` file containing your information, e.g.: +Create `configuration.yml` file containing your credentials, e.g.: ```yaml credentials: client_id: "1eac8774163c2fc938db3a0ee82a6873" - login: "your_login" + username: "your_username" password: "eKeFB2;AW6fS}z" db: host: "localhost" @@ -29,6 +33,16 @@ db: user: "instabot" password: "GT8H!b]5,9}A7" following_hours: 120 +hashtags: + - I + - люблю + - Python +instagram: + limit_sleep_time_coefficient: 1.3 + limit_sleep_time_min: 30 + success_sleep_time_coefficient: 0.5 + success_sleep_time_max: 6 + success_sleep_time_min: 4 logging: version: 1 formatters: @@ -40,24 +54,27 @@ logging: class: logging.StreamHandler level: DEBUG formatter: simple + loggers: + instabot: + level: DEBUG root: level: DEBUG handlers: - console -hashtags: - - I - - love - - Python +users_to_follow_cache_size: 300 ``` -Execute this at MySQL console: +Where: + +* `following_hours` -- how much users will be followed. +* `hashtags` -- list of hashtags to get photos to like. Optional. By default bot won't like anything. +* `logging` -- logging setup as described in [this howto](https://docs.python.org/3/howto/logging.html). +* `users_to_follow_cache_size` -- how much users should be fetched for following. The cache is being filled in once a minute. Optional. By default bot won't follow anybody. - CREATE DATABASE IF NOT EXISTS instagram CHARACTER SET utf8 COLLATE utf8_general_ci; - CREATE USER instabot@localhost IDENTIFIED BY 'GT8H!b]5,9}A7'; - GRANT ALL ON instagram.* TO instabot@localhost; +Create necessary DB tables: -## Launching + $ ./instabot_runner.py install configuration.yml Run: - $ ./instabot_runner.py + $ ./instabot_runner.py configuration.yml diff --git a/instabot/configuration.py b/instabot/configuration.py index 96d5404..5729c5d 100644 --- a/instabot/configuration.py +++ b/instabot/configuration.py @@ -18,15 +18,25 @@ def __init__(self, filename): self.db_password = configuration['db']['password'] self.following_hours = configuration['following_hours'] self.instagram_client_id = configuration['credentials']['client_id'] + self.instagram_limit_sleep_time_coefficient = \ + configuration['instagram']['limit_sleep_time_coefficient'] + self.instagram_limit_sleep_time_min = \ + configuration['instagram']['limit_sleep_time_min'] + self.instagram_success_sleep_time_coefficient = \ + configuration['instagram']['success_sleep_time_coefficient'] + self.instagram_success_sleep_time_max = \ + configuration['instagram']['success_sleep_time_max'] + self.instagram_success_sleep_time_min = \ + configuration['instagram']['success_sleep_time_min'] self.instagram_username = configuration['credentials']['username'] self.instagram_password = configuration['credentials']['password'] self.logging = configuration['logging'] - self.hashtags = configuration['hashtags'] - except KeyError as e: - sys.exit('Configuration is not fully specified. {0} is missed.'.format(e)) + except (KeyError, TypeError) as e: + sys.exit('Configuration is not fully specified. {} is missed.'.format(e)) + self.hashtags = configuration.get('hashtags', []) + self.users_to_follow_cache_size = configuration.get('users_to_follow_cache_size', 0) try: self.following_hours = int(self.following_hours) + self.users_to_follow_cache_size = int(self.users_to_follow_cache_size) except ValueError as e: - sys.exit('following_hours are specified wrong: %s' % e) - if len(self.hashtags) == 0: - sys.exit('Specify at least one hashtag, please') + sys.exit('Some integer value is specified wrong: {}'.format(e)) diff --git a/instabot/errors.py b/instabot/errors.py index 625cdf0..a983fe8 100644 --- a/instabot/errors.py +++ b/instabot/errors.py @@ -5,10 +5,16 @@ def __init__(self, code): class APIJSONError(Exception): pass -class APILimitError(APIJSONError): +class APILimitError(Exception): pass -class APINotAllowedError(APIJSONError): +class APINotAllowedError(Exception): + pass + +class APINotFoundError(Exception): + pass + +class ConfigurationError(Exception): pass class DBError(Exception): diff --git a/instabot/following_service.py b/instabot/following_service.py index 8f88b1e..7781bcb 100644 --- a/instabot/following_service.py +++ b/instabot/following_service.py @@ -1,7 +1,8 @@ import asyncio import datetime import logging -from .errors import APIError, APILimitError +from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError +from .stats_service import StatsService from .user import User LOGGER = logging.getLogger('instabot.following_service') @@ -10,40 +11,65 @@ class FollowingService: def __init__(self, client, configuration): self._client = client self._following_timedelta = datetime.timedelta(hours=configuration.following_hours) + self._stats_service = StatsService.get_instance() @asyncio.coroutine def run(self): while True: - LOGGER.debug('Cycle') try: yield from self._unfollow() yield from self._follow() except APILimitError as e: LOGGER.debug(e) - yield from asyncio.sleep(30) - except APIError as e: + except (APIError, APIJSONError) as e: LOGGER.debug(e) - yield from asyncio.sleep(10) + yield from asyncio.sleep(5) + except (IOError, OSError) as e: + LOGGER.warning(e) + yield from asyncio.sleep(5) else: yield from asyncio.sleep(10) @asyncio.coroutine def _follow(self): + ''' + @raise APIError + @raise APIJSONError + @raise APILimitError + ''' + unfollowing_threshold = datetime.datetime.utcnow() - self._following_timedelta for user in User.select().where(User.was_followed_at == None).order_by( User.following_depth, User.created, ): - yield from self._client.follow(user) - user.is_followed = True - user.was_followed_at = datetime.datetime.utcnow() + try: + yield from self._client.follow(user) + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t follow {}. {}'.format(user.username, e)) + user.is_followed = False # Make user look like he was followed and was unfollowed already. + user.was_followed_at = unfollowing_threshold + else: + user.is_followed = True + user.was_followed_at = datetime.datetime.utcnow() + self._stats_service.increment('followed') user.save() @asyncio.coroutine def _unfollow(self): + ''' + @raise APIError + @raise APIJSONError + @raise APILimitError + ''' unfollowing_threshold = datetime.datetime.utcnow() - self._following_timedelta for user in User.select().where( (User.is_followed == True) & (User.was_followed_at <= unfollowing_threshold), ): - yield from self._client.unfollow(user) + try: + yield from self._client.unfollow(user) + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t unfollow {}. {}'.format(user.username, e)) + else: + self._stats_service.increment('unfollowed') user.is_followed = False user.save() diff --git a/instabot/instabot.py b/instabot/instabot.py index ab1b340..a474a06 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -5,6 +5,7 @@ import sys from .configuration import Configuration from .db import get_db +from .errors import ConfigurationError from .following_service import FollowingService from .like_service import LikeService from .media_service import MediaService @@ -29,8 +30,9 @@ LOGGER = logging.getLogger('instabot') __version__ = '0.2' -def install(client, configuration, db): +def install(configuration, db): db.create_tables([User]) + client = instagram.Client(configuration) now = datetime.datetime.utcnow() was_followed_at = now - datetime.timedelta(hours=configuration.following_hours) user = User.create( @@ -60,31 +62,40 @@ def main(): logging.config.dictConfig(configuration.logging) db = get_db(configuration) - client = instagram.Client(configuration) if arguments['install']: LOGGER.info('Installing InstaBot') - install(client, configuration, db) + install(configuration, db) else: LOGGER.info('Executing InstaBot') - run(client, configuration) + run(configuration) -def run(client, configuration): +def run(configuration): loop = asyncio.get_event_loop() stats_service = StatsService() loop.create_task(stats_service.run()) - user_service = UserService(client) - loop.create_task(user_service.run()) - - following_service = FollowingService(client, configuration) - loop.create_task(following_service.run()) + following_client = instagram.Client(configuration) - media_service = MediaService(configuration) - loop.create_task(media_service.run()) + try: + user_service = UserService(following_client, configuration) + except ConfigurationError as e: + LOGGER.info('UserService wasn\'t started. {}'.format(e)) + else: + loop.create_task(user_service.run()) + + following_service = FollowingService(following_client, configuration) + loop.create_task(following_service.run()) - like_service = LikeService(client, media_service) - loop.create_task(like_service.run()) + try: + media_service = MediaService(configuration) + except ConfigurationError as e: + LOGGER.info('MediaService wasn\'t started. {}'.format(e)) + else: + loop.create_task(media_service.run()) + like_client = instagram.Client(configuration) + like_service = LikeService(like_client, media_service) + loop.create_task(like_service.run()) loop.run_forever() diff --git a/instabot/instagram.py b/instabot/instagram.py index f6cb631..0ebbbd3 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -1,9 +1,9 @@ -import aiohttp import asyncio import logging import json import re -from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError +from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError +from aiohttp import ClientSession BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot.instagram') @@ -12,10 +12,17 @@ class Client(object): def __init__(self, configuration): self._client_id = configuration.instagram_client_id + self._limit_sleep_time_coefficient = configuration.instagram_limit_sleep_time_coefficient + self._limit_sleep_time_min = configuration.instagram_limit_sleep_time_min + self._success_sleep_time_coefficient = configuration.instagram_success_sleep_time_coefficient + self._success_sleep_time_max = configuration.instagram_success_sleep_time_max + self._success_sleep_time_min = configuration.instagram_success_sleep_time_min + self._limit_sleep_time = self._limit_sleep_time_min + self._success_sleep_time = self._success_sleep_time_max self._username = configuration.instagram_username self._password = configuration.instagram_password self._referer = BASE_URL - self._session = aiohttp.ClientSession( + self._session = ClientSession( cookies={ 'ig_pr': '1', 'ig_vw': '1280', @@ -26,7 +33,7 @@ def __init__(self, configuration): 'X-Requested-With': 'XMLHttpRequest', }, ) - self._anonymous_session = aiohttp.ClientSession( + self._anonymous_session = ClientSession( headers={ 'User-Agent': USER_AGENT, }, @@ -38,6 +45,10 @@ def __init__(self, configuration): def _ajax(self, url, data=None, referer=None): ''' @raise APIError + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError ''' if referer is not None: self._referer = referer @@ -51,28 +62,44 @@ def _ajax(self, url, data=None, referer=None): data=data, headers=headers, ) - response = yield from response.text() + if response.status == 404: + raise APINotFoundError('404 for {}'.format(url)) + text = yield from response.text() try: - response = json.loads(response) + response_json = json.loads(text) except ValueError as e: - if 'too many requests' in response: - raise APILimitError('Too many AJAX requests to {0}'.format(url)) - raise APIError( - 'AJAX request to {0} is not JSON: {1} Response: {2}'.format(url, e, response), - ) - if response.get('status') != 'ok': - raise APIError('AJAX request to {0} is not OK: {1}'.format(url, response)) - yield from asyncio.sleep(1.5) - return response + if 'too many requests' in text or 'temporarily blocked' in text: + yield from self._sleep_limit() + raise APILimitError('Too many AJAX requests. URL: {}'.format(url)) + message = 'AJAX request to {url} is not JSON: {error} ' \ + 'Response ({status}): \"{text}\"'.format( + url=url, + error=e, + status=response.status, + text=text, + response=response, + ), + if response.status == 200: + raise APIError(message) + elif response.status == 400: + raise APINotAllowedError(message) + else: + raise APIError(message) + if response_json.get('status') != 'ok': + raise APIError('AJAX request to {} is not OK: {}'.format(url, response_json)) + yield from self._sleep_success() + return response_json @asyncio.coroutine def _api(self, path=None, url=None): ''' @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError @raise APIError ''' if url is None: - url = 'https://api.instagram.com/v1/{0}'.format(path) + url = 'https://api.instagram.com/v1/{}'.format(path) response = yield from self._anonymous_session.get( url, params={ @@ -83,14 +110,17 @@ def _api(self, path=None, url=None): try: response = json.loads(response) except ValueError as e: - raise APIError('Bad response for {0}: {1} Response: {2}'.format(url, e, response)) - self._check_api_response(response) - yield from asyncio.sleep(1.5) + raise APIError('Bad response for {}: {} Response: {}'.format(url, e, response)) + yield from self._check_api_response(response) + yield from self._sleep_success() return response + @asyncio.coroutine def _check_api_response(self, response): ''' @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError ''' code = response['meta']['code'] if code == 200: @@ -103,6 +133,7 @@ def _check_api_response(self, response): if code == 400: raise APINotAllowedError(message) elif code in (403, 429): + yield from self._sleep_limit() raise APILimitError(message) else: raise APIJSONError(message) @@ -110,6 +141,9 @@ def _check_api_response(self, response): @asyncio.coroutine def _do_login(self): ''' + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError @raise APIError ''' yield from self._open(BASE_URL) @@ -128,24 +162,34 @@ def _do_login(self): def follow(self, user): ''' @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError @raise APIError ''' try: yield from self._ajax( - 'web/friendships/{0}/follow/'.format(user), + 'web/friendships/{}/follow/'.format(user.instagram_id), referer=user.get_url(), ) - except (APIError, APILimitError) as e: - raise APIError('Troubles during following {0}: {1}'.format(user.instagram_id, e)) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during following {}. {}'.format(user.username, e), + ) + except APIError as e: + raise APIError('API troubles during following {}. {}'.format(user.username, e)) else: - LOGGER.debug('{0} was followed.'.format(user.username)) + LOGGER.debug('{} was followed'.format(user.username)) @asyncio.coroutine def get_followed(self, user): ''' @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APIError ''' - response = yield from self._api('users/{0}/follows'.format(user.instagram_id)) + response = yield from self._api('users/{}/follows'.format(user.instagram_id)) followed = response['data'] next_url = response['pagination'].get('next_url') while next_url: @@ -159,19 +203,31 @@ def get_followed(self, user): def get_some_followers(self, user): ''' @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APIError ''' - response = yield from self._api('users/{0}/followed-by'.format(user.instagram_id)) + response = yield from self._api('users/{}/followed-by'.format(user.instagram_id)) followers = response['data'] return followers @asyncio.coroutine def like(self, media): + ''' + @raise APIError + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError + ''' try: - yield from self._ajax('web/likes/{0}/like/'.format(media)) - except (APIError, APILimitError) as e: - raise APIError('Troubles during liking {0}: {1}'.format(user.instagram_id, e)) + yield from self._ajax('web/likes/{}/like/'.format(media)) + except APILimitError as e: + raise APILimitError( + 'API limit was reached during liking {}. {}'.format(media, e), + ) else: - LOGGER.debug('Liked {0}'.format(media)) + LOGGER.debug('Liked {}'.format(media)) @asyncio.coroutine def _open(self, url): @@ -183,26 +239,50 @@ def _open(self, url): response = yield from response.text() return response + @asyncio.coroutine + def relogin(self): + yield from self._session.close() + self._session.cookies.clear() + yield from self._do_login() + + @asyncio.coroutine + def _sleep_limit(self): + LOGGER.debug('Sleeping for {:.0f} sec because of API limits'.format(self._limit_sleep_time)) + yield from asyncio.sleep(self._limit_sleep_time) + self._limit_sleep_time *= self._limit_sleep_time_coefficient + + @asyncio.coroutine + def _sleep_success(self): + if self._limit_sleep_time != self._limit_sleep_time_min: + self._limit_sleep_time = self._limit_sleep_time_min + self._success_sleep_time = self._success_sleep_time_max + yield from asyncio.sleep(self._success_sleep_time) + self._success_sleep_time = self._success_sleep_time_min + (self._success_sleep_time - \ + self._success_sleep_time_min) * self._success_sleep_time_coefficient + @asyncio.coroutine def unfollow(self, user): ''' - @raise APIJSONError @raise APIError + @raise APIJSONError + @raise APILimitError + @raise APINotAllowedError + @raise APINotFoundError ''' try: response = yield from self._ajax( - 'web/friendships/{0}/unfollow/'.format(user.instagram_id), + 'web/friendships/{}/unfollow/'.format(user.instagram_id), referer=user.get_url(), ) except APILimitError as e: raise APILimitError( - 'API limit was reached during unfollowing {0}: {1}'.format(user.username, e), + 'API limit was reached during unfollowing {}. {}'.format(user.username, e), ) except APIError as e: - raise APIError('API troubles during unfollowing {0}: {1}'.format(user.username, e)) + raise APIError('API troubles during unfollowing {}. {}'.format(user.username, e)) else: - LOGGER.debug('{0} was unfollowed.'.format(user.username)) + LOGGER.debug('{} was unfollowed'.format(user.username)) def _update_csrf_token(self): self._csrf_token = self._session.cookies['csrftoken'].value - LOGGER.debug('API. CSRF token is %s', self._csrf_token) + LOGGER.debug('CSRF token is %s', self._csrf_token) diff --git a/instabot/like_service.py b/instabot/like_service.py index e710d4f..2b6a172 100644 --- a/instabot/like_service.py +++ b/instabot/like_service.py @@ -1,8 +1,9 @@ import asyncio import logging +from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError from .stats_service import StatsService -LOGGER = logging.getLogger('instabot') +LOGGER = logging.getLogger('instabot.like_service') class LikeService: def __init__(self, client, media_service): @@ -12,21 +13,21 @@ def __init__(self, client, media_service): @asyncio.coroutine def run(self): + media = yield from self._media_service.pop() while True: - media = yield from self._media_service.pop() - while True: - try: - yield from self._client.like(media) - except instagram.APIError as e: - status_code = int(e.status_code) - if status_code in (403, 429): - LOGGER.debug('Instagram limits reached during liking: %s', e) - yield from asyncio.sleep(60) - else: - LOGGER.debug('Something went wrong during liking: %s', e) - yield from asyncio.sleep(5) - else: - LOGGER.debug('Liked %s', media) - self._stats_service.increment('liked') - yield from asyncio.sleep(.7) - break + try: + yield from self._client.like(media) + except APILimitError as e: + LOGGER.debug(e) + except (APIError, APIJSONError) as e: + LOGGER.debug(e) + yield from asyncio.sleep(5) + except (APINotAllowedError, APINotFoundError) as e: + LOGGER.debug('Can\'t like {}. {}'.format(media, str(e))) + media = yield from self._media_service.pop() + except (IOError, OSError) as e: + LOGGER.warning(e) + yield from asyncio.sleep(5) + else: + media = yield from self._media_service.pop() + self._stats_service.increment('liked') diff --git a/instabot/media_service.py b/instabot/media_service.py index eb58352..c13f29f 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -1,10 +1,13 @@ -import aiohttp import asyncio import itertools +import logging import re -import urllib +import urllib.parse +from .errors import ConfigurationError +from aiohttp import ClientSession -MEDIA_LENGTH_MIN = 100 +LOGGER = logging.getLogger('instabot.media_service') +MEDIA_COUNT_MIN = 100 WEBSTA_URL = 'http://websta.me/' class ScheduleError(Exception): @@ -13,21 +16,34 @@ class ScheduleError(Exception): class MediaService(object): def __init__(self, configuration): self._hashtags = configuration.hashtags + if len(self._hashtags) == 0: + raise ConfigurationError('No hashtags were specified') self._media = [] + self._session = ClientSession() @asyncio.coroutine def _get_media_by_hashtag(self, hashtag): - hashtag_url = '{0}tag/{1}'.format(WEBSTA_URL, urllib.quote(hashtag.encode('utf-8'))) - response = yield from aiohttp.get(hashtag_url) - return re.findall('span class=\"like_count_([^\"]+)\"', response.text) + url = '{}tag/{}'.format(WEBSTA_URL, urllib.parse.quote(hashtag.encode('utf-8'))) + response = yield from self._session.get(url) + response = yield from response.read() + response = response.decode('utf-8', errors='ignore') + media = re.findall('span class=\"like_count_([^\"]+)\"', response) + LOGGER.debug('{} media about \"{}\" were fetched'.format(len(media), hashtag)) + return media @asyncio.coroutine def run(self): for hashtag in itertools.cycle(self._hashtags): - while len(self._media) < MEDIA_LENGTH_MIN: - self._media.extend((yield from self._get_media_by_hashtag(hashtag))) - while len(self._media) >= MEDIA_LENGTH_MIN: - yield from asyncio.sleep(5) + if len(self._media) < MEDIA_COUNT_MIN: + try: + self._media.extend((yield from self._get_media_by_hashtag(hashtag))) + except (IOError, OSError) as e: + LOGGER.warning(e) + yield from asyncio.sleep(5) + else: + yield from asyncio.sleep(3) + else: + yield from asyncio.sleep(30) @asyncio.coroutine def pop(self): @@ -35,4 +51,5 @@ def pop(self): try: return self._media.pop(0) except IndexError: - yield from asyncio.sleep(1) + LOGGER.debug('Has no media to pop') + yield from asyncio.sleep(5) diff --git a/instabot/stats_service.py b/instabot/stats_service.py index 158f744..b1398d1 100644 --- a/instabot/stats_service.py +++ b/instabot/stats_service.py @@ -1,4 +1,7 @@ import asyncio +import logging + +LOGGER = logging.getLogger('instabot.stats_service') class Counter: def __init__(self): @@ -12,7 +15,7 @@ def increment(self, key): self._counter[key] = value + 1 def report(self, prefix): - LOGGER.info('%s %s', prefix, str(self._counter)) + LOGGER.info('{} {!s}'.format(prefix, self._counter)) class StatsService: def __init__(self): @@ -28,13 +31,13 @@ def get_instance(cls): def run(self): hour = 0 while True: - asyncio.sleep(60 * 60) + yield from asyncio.sleep(60 * 60) hour += 1 if hour % 24 == 0: - self._daily_counter.report('Daily stats #{0}'.format(hour / 24)) + self._daily_counter.report('Daily stats #{}'.format(hour / 24)) self._daily_counter.clear() else: - self._hourly_counter.report('Hourly stats #{0}'.format(hour)) + self._hourly_counter.report('Hourly stats #{}'.format(hour)) self._hourly_counter.clear() def increment(self, key): diff --git a/instabot/user.py b/instabot/user.py index 23c2044..29821f9 100644 --- a/instabot/user.py +++ b/instabot/user.py @@ -8,7 +8,7 @@ class User(Model): following_depth = IntegerField() instagram_id = CharField(max_length=20, unique=True) is_followed = BooleanField(default=False) - username = CharField(max_length=60) + username = CharField(max_length=30) was_followed_at = DateTimeField(null=True) were_followers_fetched = BooleanField(default=False) diff --git a/instabot/user_service.py b/instabot/user_service.py index 6bbcf57..bd44f1b 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -1,28 +1,31 @@ import asyncio import logging import peewee -from .errors import APIError, APILimitError, APINotAllowedError +from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, ConfigurationError from .stats_service import StatsService from .user import User LOGGER = logging.getLogger('instabot.user_service') -USERS_TO_FOLLOW_COUNT_MIN = 1000 class UserService: - def __init__(self, client): + def __init__(self, client, configuration): self._client = client self._stats_service = StatsService.get_instance() + self._users_to_follow_cache_size = configuration.users_to_follow_cache_size + if self._users_to_follow_cache_size == 0: + raise ConfigurationError('Users to follow count was set to 0.') @asyncio.coroutine def run(self): while True: - LOGGER.debug('Cycle') try: yield from self._ensure_enough_users() except APILimitError as e: - LOGGER.debug('Instagram limits were reached: %s', e) - yield from asyncio.sleep(60) - except IOError as e: + LOGGER.debug('Instagram limits were reached. {}'.format(e)) + except (APIError, APIJSONError, APINotAllowedError) as e: + LOGGER.debug(e) + yield from asyncio.sleep(5) + except (IOError, OSError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: @@ -32,7 +35,7 @@ def run(self): def _ensure_enough_users(self): users_to_follow_count = User.select().where(User.was_followed_at == None).count() LOGGER.debug('{0} users to follow found'.format(users_to_follow_count)) - if users_to_follow_count < USERS_TO_FOLLOW_COUNT_MIN: + if users_to_follow_count < self._users_to_follow_cache_size: last_users_to_follow_count = users_to_follow_count for user in User.select().where(User.were_followers_fetched == False).order_by( User.following_depth, From 7d508d84220f856b867a549f38eb905aab5d4e21 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 23 Feb 2016 19:52:33 +0300 Subject: [PATCH 14/48] UserService was fixed. ClientResponseError handling was added, minor fixes at README.md --- README.md | 12 ++++++------ instabot/following_service.py | 3 ++- instabot/like_service.py | 3 ++- instabot/media_service.py | 3 ++- instabot/user_service.py | 5 +++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4d791b2..972a9fd 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Instagram Bot +# InstaBot Instagram bot that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses followers of people you have followed. @@ -14,7 +14,7 @@ During installation process it saves people followed by you as "followed long ti Create MySQL DB: -```mysql +```sql CREATE DATABASE IF NOT EXISTS instagram CHARACTER SET utf8 COLLATE utf8_general_ci; CREATE USER instabot@localhost IDENTIFIED BY 'GT8H!b]5,9}A7'; GRANT ALL ON instagram.* TO instabot@localhost; @@ -66,10 +66,10 @@ users_to_follow_cache_size: 300 Where: -* `following_hours` -- how much users will be followed. -* `hashtags` -- list of hashtags to get photos to like. Optional. By default bot won't like anything. -* `logging` -- logging setup as described in [this howto](https://docs.python.org/3/howto/logging.html). -* `users_to_follow_cache_size` -- how much users should be fetched for following. The cache is being filled in once a minute. Optional. By default bot won't follow anybody. +* `following_hours` — how long users will stay followed. +* `hashtags` — list of hashtags to get photos to like. Optional. By default bot won't like anything. +* `logging` — logging setup as described in [this howto](https://docs.python.org/3/howto/logging.html). +* `users_to_follow_cache_size` — how much users should be fetched for following. The cache is being filled in once a minute. Optional. By default bot won't follow anybody. Create necessary DB tables: diff --git a/instabot/following_service.py b/instabot/following_service.py index 7781bcb..ef2ba76 100644 --- a/instabot/following_service.py +++ b/instabot/following_service.py @@ -4,6 +4,7 @@ from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError from .stats_service import StatsService from .user import User +from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.following_service') @@ -24,7 +25,7 @@ def run(self): except (APIError, APIJSONError) as e: LOGGER.debug(e) yield from asyncio.sleep(5) - except (IOError, OSError) as e: + except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: diff --git a/instabot/like_service.py b/instabot/like_service.py index 2b6a172..31822fc 100644 --- a/instabot/like_service.py +++ b/instabot/like_service.py @@ -2,6 +2,7 @@ import logging from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError from .stats_service import StatsService +from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.like_service') @@ -25,7 +26,7 @@ def run(self): except (APINotAllowedError, APINotFoundError) as e: LOGGER.debug('Can\'t like {}. {}'.format(media, str(e))) media = yield from self._media_service.pop() - except (IOError, OSError) as e: + except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: diff --git a/instabot/media_service.py b/instabot/media_service.py index c13f29f..f55b3fa 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -5,6 +5,7 @@ import urllib.parse from .errors import ConfigurationError from aiohttp import ClientSession +from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.media_service') MEDIA_COUNT_MIN = 100 @@ -37,7 +38,7 @@ def run(self): if len(self._media) < MEDIA_COUNT_MIN: try: self._media.extend((yield from self._get_media_by_hashtag(hashtag))) - except (IOError, OSError) as e: + except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: diff --git a/instabot/user_service.py b/instabot/user_service.py index bd44f1b..cc7ab2e 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -4,6 +4,7 @@ from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, ConfigurationError from .stats_service import StatsService from .user import User +from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.user_service') @@ -25,7 +26,7 @@ def run(self): except (APIError, APIJSONError, APINotAllowedError) as e: LOGGER.debug(e) yield from asyncio.sleep(5) - except (IOError, OSError) as e: + except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: @@ -74,7 +75,7 @@ def _ensure_enough_users(self): else: users_to_follow_count += 1 self._stats_service.increment('users_to_follow_fetched') - if users_to_follow_count >= USERS_TO_FOLLOW_COUNT_MIN: + if users_to_follow_count >= self._users_to_follow_cache_size: break LOGGER.debug( '%d users fetched.', From 5bdb5bd0b839dc50591cc037e13d009b271beab9 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 04:36:58 +0300 Subject: [PATCH 15/48] Minor fixes at README.md and UserService --- README.md | 4 ++-- instabot/user_service.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 972a9fd..fcfe540 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ Where: Create necessary DB tables: - $ ./instabot_runner.py install configuration.yml + (instabotenv) $ ./instabot_runner.py install configuration.yml Run: - $ ./instabot_runner.py configuration.yml + (instabotenv) $ ./instabot_runner.py configuration.yml diff --git a/instabot/user_service.py b/instabot/user_service.py index cc7ab2e..482587b 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -35,7 +35,7 @@ def run(self): @asyncio.coroutine def _ensure_enough_users(self): users_to_follow_count = User.select().where(User.was_followed_at == None).count() - LOGGER.debug('{0} users to follow found'.format(users_to_follow_count)) + LOGGER.debug('{} users to follow found'.format(users_to_follow_count)) if users_to_follow_count < self._users_to_follow_cache_size: last_users_to_follow_count = users_to_follow_count for user in User.select().where(User.were_followers_fetched == False).order_by( @@ -47,7 +47,7 @@ def _ensure_enough_users(self): followers_json = yield from self._client.get_some_followers(user) except APINotAllowedError as e: LOGGER.debug( - 'Can\'t fetch followers of {0}: {1}'.format( + 'Can\'t fetch followers of {}. {}'.format( user.username, e, ), @@ -58,7 +58,7 @@ def _ensure_enough_users(self): user.were_followers_fetched = True user.save() LOGGER.debug( - '{0} followers of {1} were fetched'.format( + '{} followers of {} were fetched'.format( len(followers_json), user.username, ), @@ -78,6 +78,6 @@ def _ensure_enough_users(self): if users_to_follow_count >= self._users_to_follow_cache_size: break LOGGER.debug( - '%d users fetched.', + '%d users saved in DB', users_to_follow_count - last_users_to_follow_count, ) From e278a1b58bc2787396e367d91ea29be9fd5a0267 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 05:27:39 +0300 Subject: [PATCH 16/48] PyMySQL was added to requirements. --- requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9845c51..ef10701 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ asyncio>=3.4.3,<4.0 docopt>=0.6.2,<0.7 -PyYAML>=3.11 -peewee>=2.7.4 +PyYAML>=3.11,<4.0 +peewee>=2.7.4,<3.0 +pymysql>=0.6.7,<0.7 From c5ec1c9ad7285bb988ac2fb294f0a813d068a42d Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 06:28:33 +0300 Subject: [PATCH 17/48] Log files were added to .gitignore --- .gitignore | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 793ccdd..ae4542d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ *.pyc -state.yml -configuration.yml -hashtags.txt + +# Configuration +conf*.yml + +# Logging *.log +log.log.* From 50f5d544b3543ab29290080d922a13a5df2aa78f Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 06:31:57 +0300 Subject: [PATCH 18/48] Logging into file was described at README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index fcfe540..516e3f3 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ During installation process it saves people followed by you as "followed long ti $ cd instabotenv $ source bin/activate (instabotenv) $ git clone https://github.com/quasiyoke/InstaBot.git + (instabotenv) $ cd InstaBot (instabotenv) $ pip install -r requirements.txt Create MySQL DB: @@ -54,6 +55,14 @@ logging: class: logging.StreamHandler level: DEBUG formatter: simple + file: + class: logging.handlers.RotatingFileHandler + level: DEBUG + formatter: simple + filename: log.log + maxBytes: 10485760 + backupCount: 10 + encoding: utf-8 loggers: instabot: level: DEBUG From 4d7876f5b076fb52596d75479f468059429fa905 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 06:33:26 +0300 Subject: [PATCH 19/48] Delays after unsuccessful requests were added. --- instabot/instagram.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/instabot/instagram.py b/instabot/instagram.py index 0ebbbd3..c4bcf9c 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -63,6 +63,8 @@ def _ajax(self, url, data=None, referer=None): headers=headers, ) if response.status == 404: + yield from response.close() + yield from self._sleep_success() raise APINotFoundError('404 for {}'.format(url)) text = yield from response.text() try: @@ -80,10 +82,13 @@ def _ajax(self, url, data=None, referer=None): response=response, ), if response.status == 200: + yield from self._sleep_success() raise APIError(message) elif response.status == 400: + yield from self._sleep_success() raise APINotAllowedError(message) else: + yield from self._sleep_success() raise APIError(message) if response_json.get('status') != 'ok': raise APIError('AJAX request to {} is not OK: {}'.format(url, response_json)) @@ -125,10 +130,10 @@ def _check_api_response(self, response): code = response['meta']['code'] if code == 200: return - message = '{0} ({1}): {2}'.format( - code, - response['meta']['error_type'], - response['meta']['error_message'], + message = '{code} ({type}): {message}'.format( + code=code, + type=response['meta']['error_type'], + message=response['meta']['error_message'], ) if code == 400: raise APINotAllowedError(message) @@ -196,7 +201,7 @@ def get_followed(self, user): response = yield from self._api(url=next_url) followed.extend(response['data']) next_url = response['pagination'].get('next_url') - LOGGER.debug('%d followed users were fetched.', len(followed)) + LOGGER.debug('{} followed users were fetched'.format(len(followed))) return followed @asyncio.coroutine From 7e410b404713c758a64aa1063fa204dd63f1f1be Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 06:38:15 +0300 Subject: [PATCH 20/48] Bumped version number to 0.2.1. --- instabot/instabot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instabot/instabot.py b/instabot/instabot.py index a474a06..37fa849 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -28,7 +28,7 @@ CONFIGURATION Path to configuration.yml file. ''' LOGGER = logging.getLogger('instabot') -__version__ = '0.2' +__version__ = '0.2.1' def install(configuration, db): db.create_tables([User]) From 7cb22207f65798ae864123691f0805daf7ff94be Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 07:13:45 +0300 Subject: [PATCH 21/48] Amount of logging was reduced in case of 5XX errors. --- instabot/instagram.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/instabot/instagram.py b/instabot/instagram.py index c4bcf9c..cd35a68 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -65,7 +65,11 @@ def _ajax(self, url, data=None, referer=None): if response.status == 404: yield from response.close() yield from self._sleep_success() - raise APINotFoundError('404 for {}'.format(url)) + raise APINotFoundError('AJAX response status code is 404 for {}'.format(url)) + elif 500 <= response.status < 600: + yield from response.close() + yield from self._sleep_success() + raise APIError(response.status) text = yield from response.text() try: response_json = json.loads(text) From 979513fdf44b84d0fe95018d73f43d5895430972 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Wed, 24 Feb 2016 16:57:19 +0300 Subject: [PATCH 22/48] Closing connection in case of bad response was fixed. --- instabot/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/instabot/instagram.py b/instabot/instagram.py index cd35a68..09431c8 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -63,11 +63,11 @@ def _ajax(self, url, data=None, referer=None): headers=headers, ) if response.status == 404: - yield from response.close() + response.close() yield from self._sleep_success() raise APINotFoundError('AJAX response status code is 404 for {}'.format(url)) elif 500 <= response.status < 600: - yield from response.close() + response.close() yield from self._sleep_success() raise APIError(response.status) text = yield from response.text() From aa2f57d2ceb363ad99094cfc7f99d600a4304ba1 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 31 May 2016 10:05:33 +0300 Subject: [PATCH 23/48] Virtualenv was added to .gitignore. --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index ae4542d..a598d8b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,10 @@ conf*.yml # Logging *.log log.log.* + +#virtualenv +/bin/ +/include/ +/lib/ +/pip-selfcheck.json +/share/ From 8058c6d15fc22c7e7dd785e69e2a2a51c3439111 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 31 May 2016 11:17:33 +0300 Subject: [PATCH 24/48] Websta.me was replaced by tags exploration inside Instagram --- instabot/configuration.py | 2 +- instabot/errors.py | 3 +++ instabot/instabot.py | 2 +- instabot/instagram.py | 3 +-- instabot/media_service.py | 25 ++++++++++++++++--------- instabot/stats_service.py | 2 +- requirements.txt | 1 + 7 files changed, 24 insertions(+), 14 deletions(-) diff --git a/instabot/configuration.py b/instabot/configuration.py index 5729c5d..1ac758f 100644 --- a/instabot/configuration.py +++ b/instabot/configuration.py @@ -2,7 +2,7 @@ import sys import yaml -LOGGER = logging.getLogger('instabot') +LOGGER = logging.getLogger('instabot.configuration') class Configuration: def __init__(self, filename): diff --git a/instabot/errors.py b/instabot/errors.py index a983fe8..7a3a315 100644 --- a/instabot/errors.py +++ b/instabot/errors.py @@ -19,3 +19,6 @@ class ConfigurationError(Exception): class DBError(Exception): pass + +class MediaError(Exception): + pass diff --git a/instabot/instabot.py b/instabot/instabot.py index 37fa849..a4ecb2d 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -97,5 +97,5 @@ def run(configuration): like_client = instagram.Client(configuration) like_service = LikeService(like_client, media_service) loop.create_task(like_service.run()) - + loop.run_forever() diff --git a/instabot/instagram.py b/instabot/instagram.py index 09431c8..79212bd 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -1,13 +1,12 @@ import asyncio import logging import json -import re from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError from aiohttp import ClientSession BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot.instagram') -USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:44.0) Gecko/20100101 Firefox/44.0' +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0' class Client(object): def __init__(self, configuration): diff --git a/instabot/media_service.py b/instabot/media_service.py index f55b3fa..17d805d 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -1,20 +1,17 @@ import asyncio import itertools +import json import logging import re import urllib.parse -from .errors import ConfigurationError +from .errors import ConfigurationError, MediaError from aiohttp import ClientSession from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.media_service') MEDIA_COUNT_MIN = 100 -WEBSTA_URL = 'http://websta.me/' -class ScheduleError(Exception): - pass - -class MediaService(object): +class MediaService: def __init__(self, configuration): self._hashtags = configuration.hashtags if len(self._hashtags) == 0: @@ -24,11 +21,21 @@ def __init__(self, configuration): @asyncio.coroutine def _get_media_by_hashtag(self, hashtag): - url = '{}tag/{}'.format(WEBSTA_URL, urllib.parse.quote(hashtag.encode('utf-8'))) + url = 'https://www.instagram.com/explore/tags/{}/'.format( + urllib.parse.quote(hashtag.encode('utf-8')), + ) response = yield from self._session.get(url) response = yield from response.read() response = response.decode('utf-8', errors='ignore') - media = re.findall('span class=\"like_count_([^\"]+)\"', response) + match = re.search( + r'', + response, + ) + if match is None: + raise MediaError() + response = json.loads(match.group(1)) + media = response['entry_data']['TagPage'][0]['tag']['media']['nodes'] + media = [m['id'] for m in media] LOGGER.debug('{} media about \"{}\" were fetched'.format(len(media), hashtag)) return media @@ -38,7 +45,7 @@ def run(self): if len(self._media) < MEDIA_COUNT_MIN: try: self._media.extend((yield from self._get_media_by_hashtag(hashtag))) - except (IOError, OSError, ClientResponseError) as e: + except (IOError, OSError, ClientResponseError, MediaError) as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: diff --git a/instabot/stats_service.py b/instabot/stats_service.py index b1398d1..11fa307 100644 --- a/instabot/stats_service.py +++ b/instabot/stats_service.py @@ -34,7 +34,7 @@ def run(self): yield from asyncio.sleep(60 * 60) hour += 1 if hour % 24 == 0: - self._daily_counter.report('Daily stats #{}'.format(hour / 24)) + self._daily_counter.report('Daily stats #{:.0f}'.format(hour / 24)) self._daily_counter.clear() else: self._hourly_counter.report('Hourly stats #{}'.format(hour)) diff --git a/requirements.txt b/requirements.txt index ef10701..1e9533e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ asyncio>=3.4.3,<4.0 +aiohttp>=0.21.6,<0.22 docopt>=0.6.2,<0.7 PyYAML>=3.11,<4.0 peewee>=2.7.4,<3.0 From 54fdc58cc23047cdce072f4715893c16669627ed Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Tue, 31 May 2016 11:33:30 +0300 Subject: [PATCH 25/48] Bumped version number to 0.2.2. --- instabot/instabot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instabot/instabot.py b/instabot/instabot.py index a4ecb2d..4d6ebee 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -28,7 +28,7 @@ CONFIGURATION Path to configuration.yml file. ''' LOGGER = logging.getLogger('instabot') -__version__ = '0.2.1' +__version__ = '0.2.2' def install(configuration, db): db.create_tables([User]) From 96284535bc8e2822c1934eec49094fb99f62037c Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Sat, 27 Aug 2016 12:51:36 +0300 Subject: [PATCH 26/48] PEP8 was applied to the sources. --- instabot/configuration.py | 17 ++++-- instabot/db.py | 5 +- instabot/errors.py | 7 +++ instabot/following_service.py | 29 +++++---- instabot/instabot.py | 13 +++- instabot/instagram.py | 85 ++++++++++++++++++-------- instabot/like_service.py | 4 +- instabot/media_service.py | 15 +++-- instabot/stats_service.py | 6 +- instabot/user.py | 1 + instabot/user_service.py | 29 ++++++--- instabot/views.py | 108 ---------------------------------- 12 files changed, 151 insertions(+), 168 deletions(-) delete mode 100644 instabot/views.py diff --git a/instabot/configuration.py b/instabot/configuration.py index 1ac758f..2cc729d 100644 --- a/instabot/configuration.py +++ b/instabot/configuration.py @@ -4,6 +4,7 @@ LOGGER = logging.getLogger('instabot.configuration') + class Configuration: def __init__(self, filename): try: @@ -17,7 +18,8 @@ def __init__(self, filename): self.db_user = configuration['db']['user'] self.db_password = configuration['db']['password'] self.following_hours = configuration['following_hours'] - self.instagram_client_id = configuration['credentials']['client_id'] + self.instagram_client_id = \ + configuration['credentials']['client_id'] self.instagram_limit_sleep_time_coefficient = \ configuration['instagram']['limit_sleep_time_coefficient'] self.instagram_limit_sleep_time_min = \ @@ -32,11 +34,18 @@ def __init__(self, filename): self.instagram_password = configuration['credentials']['password'] self.logging = configuration['logging'] except (KeyError, TypeError) as e: - sys.exit('Configuration is not fully specified. {} is missed.'.format(e)) + sys.exit( + 'Configuration is not fully specified. {} is missed.' + .format(e), + ) self.hashtags = configuration.get('hashtags', []) - self.users_to_follow_cache_size = configuration.get('users_to_follow_cache_size', 0) + self.users_to_follow_cache_size = configuration.get( + 'users_to_follow_cache_size', + 0, + ) try: self.following_hours = int(self.following_hours) - self.users_to_follow_cache_size = int(self.users_to_follow_cache_size) + self.users_to_follow_cache_size = \ + int(self.users_to_follow_cache_size) except ValueError as e: sys.exit('Some integer value is specified wrong: {}'.format(e)) diff --git a/instabot/db.py b/instabot/db.py index 705ddf2..1adfca3 100644 --- a/instabot/db.py +++ b/instabot/db.py @@ -7,13 +7,16 @@ LOGGER = logging.getLogger('instabot') + class RetryingMySQLDatabase(RetryOperationalError, MySQLDatabase): ''' Automatically reconnecting database class. - @see http://docs.peewee-orm.com/en/latest/peewee/database.html#automatic-reconnect + @see {@link + http://docs.peewee-orm.com/en/latest/peewee/database.html#automatic-reconnect} ''' pass + def get_db(configuration): ''' @raise DBError diff --git a/instabot/errors.py b/instabot/errors.py index 7a3a315..2aaffb6 100644 --- a/instabot/errors.py +++ b/instabot/errors.py @@ -2,23 +2,30 @@ class APIError(Exception): def __init__(self, code): super(APIError, self).__init__(str(code)) + class APIJSONError(Exception): pass + class APILimitError(Exception): pass + class APINotAllowedError(Exception): pass + class APINotFoundError(Exception): pass + class ConfigurationError(Exception): pass + class DBError(Exception): pass + class MediaError(Exception): pass diff --git a/instabot/following_service.py b/instabot/following_service.py index ef2ba76..160a0d6 100644 --- a/instabot/following_service.py +++ b/instabot/following_service.py @@ -1,17 +1,20 @@ import asyncio import datetime import logging -from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, APINotFoundError from .stats_service import StatsService from .user import User from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.following_service') + class FollowingService: def __init__(self, client, configuration): self._client = client - self._following_timedelta = datetime.timedelta(hours=configuration.following_hours) + self._following_timedelta = \ + datetime.timedelta(hours=configuration.following_hours) self._stats_service = StatsService.get_instance() @asyncio.coroutine @@ -38,16 +41,18 @@ def _follow(self): @raise APIJSONError @raise APILimitError ''' - unfollowing_threshold = datetime.datetime.utcnow() - self._following_timedelta - for user in User.select().where(User.was_followed_at == None).order_by( - User.following_depth, - User.created, - ): + unfollowing_threshold = datetime.datetime.utcnow() - \ + self._following_timedelta + for user in User.select().where( + User.was_followed_at == None, + ).order_by(User.following_depth, User.created): try: yield from self._client.follow(user) except (APINotAllowedError, APINotFoundError) as e: LOGGER.debug('Can\'t follow {}. {}'.format(user.username, e)) - user.is_followed = False # Make user look like he was followed and was unfollowed already. + # Make user look like he was followed and was unfollowed + # already. + user.is_followed = False user.was_followed_at = unfollowing_threshold else: user.is_followed = True @@ -62,10 +67,12 @@ def _unfollow(self): @raise APIJSONError @raise APILimitError ''' - unfollowing_threshold = datetime.datetime.utcnow() - self._following_timedelta + unfollowing_threshold = datetime.datetime.utcnow() - \ + self._following_timedelta for user in User.select().where( - (User.is_followed == True) & (User.was_followed_at <= unfollowing_threshold), - ): + (User.is_followed == True) & + (User.was_followed_at <= unfollowing_threshold), + ): try: yield from self._client.unfollow(user) except (APINotAllowedError, APINotFoundError) as e: diff --git a/instabot/instabot.py b/instabot/instabot.py index 4d6ebee..34f1b90 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -30,16 +30,19 @@ LOGGER = logging.getLogger('instabot') __version__ = '0.2.2' + def install(configuration, db): db.create_tables([User]) client = instagram.Client(configuration) now = datetime.datetime.utcnow() - was_followed_at = now - datetime.timedelta(hours=configuration.following_hours) + was_followed_at = now - \ + datetime.timedelta(hours=configuration.following_hours) user = User.create( following_depth=0, instagram_id=client.id, username=configuration.instagram_username, - was_followed_at=was_followed_at, # To prevent attempts to follow user by himself. + # To prevent attempts to follow user by himself. + was_followed_at=was_followed_at, ) loop = asyncio.get_event_loop() followed_users_json = loop.run_until_complete(client.get_followed(user)) @@ -51,7 +54,10 @@ def install(configuration, db): username=followed_json['username'], was_followed_at=was_followed_at, ) - LOGGER.info('{0} followed users were saved in DB'.format(len(followed_users_json))) + LOGGER.info( + '{0} followed users were saved in DB'.format(len(followed_users_json)), + ) + def main(): arguments = docopt(DOC, version=__version__) @@ -70,6 +76,7 @@ def main(): LOGGER.info('Executing InstaBot') run(configuration) + def run(configuration): loop = asyncio.get_event_loop() diff --git a/instabot/instagram.py b/instabot/instagram.py index 79212bd..f66791c 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -1,21 +1,29 @@ import asyncio import logging import json -from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, APINotFoundError from aiohttp import ClientSession BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot.instagram') -USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0' +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) ' \ + 'Gecko/20100101 Firefox/46.0' + class Client(object): def __init__(self, configuration): self._client_id = configuration.instagram_client_id - self._limit_sleep_time_coefficient = configuration.instagram_limit_sleep_time_coefficient - self._limit_sleep_time_min = configuration.instagram_limit_sleep_time_min - self._success_sleep_time_coefficient = configuration.instagram_success_sleep_time_coefficient - self._success_sleep_time_max = configuration.instagram_success_sleep_time_max - self._success_sleep_time_min = configuration.instagram_success_sleep_time_min + self._limit_sleep_time_coefficient = configuration + .instagram_limit_sleep_time_coefficient + self._limit_sleep_time_min = configuration + .instagram_limit_sleep_time_min + self._success_sleep_time_coefficient = configuration + .instagram_success_sleep_time_coefficient + self._success_sleep_time_max = configuration + .instagram_success_sleep_time_max + self._success_sleep_time_min = configuration + .instagram_success_sleep_time_min self._limit_sleep_time = self._limit_sleep_time_min self._success_sleep_time = self._success_sleep_time_max self._username = configuration.instagram_username @@ -64,7 +72,9 @@ def _ajax(self, url, data=None, referer=None): if response.status == 404: response.close() yield from self._sleep_success() - raise APINotFoundError('AJAX response status code is 404 for {}'.format(url)) + raise APINotFoundError( + 'AJAX response status code is 404 for {}'.format(url), + ) elif 500 <= response.status < 600: response.close() yield from self._sleep_success() @@ -75,15 +85,17 @@ def _ajax(self, url, data=None, referer=None): except ValueError as e: if 'too many requests' in text or 'temporarily blocked' in text: yield from self._sleep_limit() - raise APILimitError('Too many AJAX requests. URL: {}'.format(url)) + raise APILimitError( + 'Too many AJAX requests. URL: {}'.format(url), + ) message = 'AJAX request to {url} is not JSON: {error} ' \ 'Response ({status}): \"{text}\"'.format( - url=url, - error=e, - status=response.status, - text=text, - response=response, - ), + url=url, + error=e, + status=response.status, + text=text, + response=response, + ), if response.status == 200: yield from self._sleep_success() raise APIError(message) @@ -94,7 +106,9 @@ def _ajax(self, url, data=None, referer=None): yield from self._sleep_success() raise APIError(message) if response_json.get('status') != 'ok': - raise APIError('AJAX request to {} is not OK: {}'.format(url, response_json)) + raise APIError( + 'AJAX request to {} is not OK: {}'.format(url, response_json), + ) yield from self._sleep_success() return response_json @@ -118,7 +132,10 @@ def _api(self, path=None, url=None): try: response = json.loads(response) except ValueError as e: - raise APIError('Bad response for {}: {} Response: {}'.format(url, e, response)) + raise APIError( + 'Bad response for {}: {} Response: {}' + .format(url, e, response), + ) yield from self._check_api_response(response) yield from self._sleep_success() return response @@ -182,10 +199,14 @@ def follow(self, user): ) except APILimitError as e: raise APILimitError( - 'API limit was reached during following {}. {}'.format(user.username, e), + 'API limit was reached during following {}. {}' + .format(user.username, e), ) except APIError as e: - raise APIError('API troubles during following {}. {}'.format(user.username, e)) + raise APIError( + 'API troubles during following {}. {}' + .format(user.username, e), + ) else: LOGGER.debug('{} was followed'.format(user.username)) @@ -197,7 +218,9 @@ def get_followed(self, user): @raise APINotAllowedError @raise APIError ''' - response = yield from self._api('users/{}/follows'.format(user.instagram_id)) + response = yield from self._api( + 'users/{}/follows'.format(user.instagram_id), + ) followed = response['data'] next_url = response['pagination'].get('next_url') while next_url: @@ -215,7 +238,9 @@ def get_some_followers(self, user): @raise APINotAllowedError @raise APIError ''' - response = yield from self._api('users/{}/followed-by'.format(user.instagram_id)) + response = yield from self._api( + 'users/{}/followed-by'.format(user.instagram_id), + ) followers = response['data'] return followers @@ -255,7 +280,10 @@ def relogin(self): @asyncio.coroutine def _sleep_limit(self): - LOGGER.debug('Sleeping for {:.0f} sec because of API limits'.format(self._limit_sleep_time)) + LOGGER.debug( + 'Sleeping for {:.0f} sec because of API limits' + .format(self._limit_sleep_time), + ) yield from asyncio.sleep(self._limit_sleep_time) self._limit_sleep_time *= self._limit_sleep_time_coefficient @@ -265,8 +293,9 @@ def _sleep_success(self): self._limit_sleep_time = self._limit_sleep_time_min self._success_sleep_time = self._success_sleep_time_max yield from asyncio.sleep(self._success_sleep_time) - self._success_sleep_time = self._success_sleep_time_min + (self._success_sleep_time - \ - self._success_sleep_time_min) * self._success_sleep_time_coefficient + self._success_sleep_time = self._success_sleep_time_min + \ + (self._success_sleep_time - self._success_sleep_time_min) * \ + self._success_sleep_time_coefficient @asyncio.coroutine def unfollow(self, user): @@ -284,10 +313,14 @@ def unfollow(self, user): ) except APILimitError as e: raise APILimitError( - 'API limit was reached during unfollowing {}. {}'.format(user.username, e), + 'API limit was reached during unfollowing {}. {}' + .format(user.username, e), ) except APIError as e: - raise APIError('API troubles during unfollowing {}. {}'.format(user.username, e)) + raise APIError( + 'API troubles during unfollowing {}. {}' + .format(user.username, e), + ) else: LOGGER.debug('{} was unfollowed'.format(user.username)) diff --git a/instabot/like_service.py b/instabot/like_service.py index 31822fc..652473b 100644 --- a/instabot/like_service.py +++ b/instabot/like_service.py @@ -1,11 +1,13 @@ import asyncio import logging -from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, APINotFoundError +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, APINotFoundError from .stats_service import StatsService from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.like_service') + class LikeService: def __init__(self, client, media_service): self._client = client diff --git a/instabot/media_service.py b/instabot/media_service.py index 17d805d..e7a04d4 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -11,6 +11,7 @@ LOGGER = logging.getLogger('instabot.media_service') MEDIA_COUNT_MIN = 100 + class MediaService: def __init__(self, configuration): self._hashtags = configuration.hashtags @@ -28,7 +29,8 @@ def _get_media_by_hashtag(self, hashtag): response = yield from response.read() response = response.decode('utf-8', errors='ignore') match = re.search( - r'', + r'', response, ) if match is None: @@ -36,7 +38,9 @@ def _get_media_by_hashtag(self, hashtag): response = json.loads(match.group(1)) media = response['entry_data']['TagPage'][0]['tag']['media']['nodes'] media = [m['id'] for m in media] - LOGGER.debug('{} media about \"{}\" were fetched'.format(len(media), hashtag)) + LOGGER.debug( + '{} media about \"{}\" were fetched'.format(len(media), hashtag), + ) return media @asyncio.coroutine @@ -44,8 +48,11 @@ def run(self): for hashtag in itertools.cycle(self._hashtags): if len(self._media) < MEDIA_COUNT_MIN: try: - self._media.extend((yield from self._get_media_by_hashtag(hashtag))) - except (IOError, OSError, ClientResponseError, MediaError) as e: + self._media.extend( + (yield from self._get_media_by_hashtag(hashtag)), + ) + except (IOError, OSError, ClientResponseError, MediaError) \ + as e: LOGGER.warning(e) yield from asyncio.sleep(5) else: diff --git a/instabot/stats_service.py b/instabot/stats_service.py index 11fa307..33ce910 100644 --- a/instabot/stats_service.py +++ b/instabot/stats_service.py @@ -3,6 +3,7 @@ LOGGER = logging.getLogger('instabot.stats_service') + class Counter: def __init__(self): self._counter = {} @@ -17,6 +18,7 @@ def increment(self, key): def report(self, prefix): LOGGER.info('{} {!s}'.format(prefix, self._counter)) + class StatsService: def __init__(self): self._hourly_counter = Counter() @@ -34,7 +36,9 @@ def run(self): yield from asyncio.sleep(60 * 60) hour += 1 if hour % 24 == 0: - self._daily_counter.report('Daily stats #{:.0f}'.format(hour / 24)) + self._daily_counter.report( + 'Daily stats #{:.0f}'.format(hour / 24), + ) self._daily_counter.clear() else: self._hourly_counter.report('Hourly stats #{}'.format(hour)) diff --git a/instabot/user.py b/instabot/user.py index 29821f9..d615986 100644 --- a/instabot/user.py +++ b/instabot/user.py @@ -3,6 +3,7 @@ database_proxy = Proxy() + class User(Model): created = DateTimeField(default=datetime.datetime.utcnow) following_depth = IntegerField() diff --git a/instabot/user_service.py b/instabot/user_service.py index 482587b..55b5f5c 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -1,18 +1,21 @@ import asyncio import logging import peewee -from .errors import APIError, APIJSONError, APILimitError, APINotAllowedError, ConfigurationError +from .errors import APIError, APIJSONError, APILimitError, \ + APINotAllowedError, ConfigurationError from .stats_service import StatsService from .user import User from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.user_service') + class UserService: def __init__(self, client, configuration): self._client = client self._stats_service = StatsService.get_instance() - self._users_to_follow_cache_size = configuration.users_to_follow_cache_size + self._users_to_follow_cache_size = configuration + .users_to_follow_cache_size if self._users_to_follow_cache_size == 0: raise ConfigurationError('Users to follow count was set to 0.') @@ -34,17 +37,24 @@ def run(self): @asyncio.coroutine def _ensure_enough_users(self): - users_to_follow_count = User.select().where(User.was_followed_at == None).count() + users_to_follow_count = User + .select() + .where(User.was_followed_at == None) + .count() LOGGER.debug('{} users to follow found'.format(users_to_follow_count)) if users_to_follow_count < self._users_to_follow_cache_size: last_users_to_follow_count = users_to_follow_count - for user in User.select().where(User.were_followers_fetched == False).order_by( - User.following_depth, - User.created, - ): + for user in User + .select() + .where(User.were_followers_fetched == False) + .order_by( + User.following_depth, + User.created, + ): following_depth = user.following_depth + 1 try: - followers_json = yield from self._client.get_some_followers(user) + followers_json = \ + yield from self._client.get_some_followers(user) except APINotAllowedError as e: LOGGER.debug( 'Can\'t fetch followers of {}. {}'.format( @@ -74,7 +84,8 @@ def _ensure_enough_users(self): pass else: users_to_follow_count += 1 - self._stats_service.increment('users_to_follow_fetched') + self._stats_service + .increment('users_to_follow_fetched') if users_to_follow_count >= self._users_to_follow_cache_size: break LOGGER.debug( diff --git a/instabot/views.py b/instabot/views.py deleted file mode 100644 index 85cee33..0000000 --- a/instabot/views.py +++ /dev/null @@ -1,108 +0,0 @@ -import json -from django import http -from cms import sitemaps -from django.core import mail -from django.conf import settings -from django.views.generic import edit as edit_views -from cmsplugin_dog import models as dog_models -import models -import forms - - -def _update_user_subscriptions(user, subscribe): - import mailchimp - from mailchimp import chimp - user.subscribed = subscribe - mailchimp_list = mailchimp.utils.get_connection().get_list_by_id(settings.MAILCHIMP_LIST_ID) - if subscribe: - try: - mailchimp_list.subscribe(user.auth_user.email, { 'EMAIL': user.auth_user.email, 'FNAME': user.auth_user.first_name, }) - except chimp.ChimpyException: - pass - else: - try: - mailchimp_list.unsubscribe(user.auth_user.email) - except chimp.ChimpyException: - pass - - -class Claim(edit_views.FormView): - form_class = forms.ClaimForm - template_name = 'claim.html' - - def get_context_data(self, *args, **kwargs): - context = super(Claim, self).get_context_data(*args, **kwargs) - context['dog'] = getattr(self, 'dog', None) - return context - - def get_initial(self): - initial = super(Claim, self).get_initial() - initial.update(forms.ClaimForm.get_initial(self.request.user)) - return initial - - def get_success_url(self): - return self.dog.get_url() - - def post(self, request, *args, **kwargs): - form_class = self.get_form_class() - form = self.get_form(form_class) - self.dog = self.get_dog(form) - if form.is_valid(): - return self.form_valid(form) - else: - return self.form_invalid(form) - - def get_dog(self, form): - try: - return dog_models.Dog.objects.get(pk=form.data['dog']) - except dog_models.Dog.DoesNotExist: - raise http.Http404() - - def form_valid(self, form): - auth_user = self.request.user - if auth_user.is_authenticated() and auth_user.email == form.cleaned_data['email'].lower(): - try: - user = auth_user.kapustkinpitomnik_user - except models.User.DoesNotExist: - user = None - else: - auth_user = auth.authenticate(form.cleaned_data['email'], settings.ANY_PASSWORD) - if auth_user: - try: - user = auth_user.kapustkinpitomnik_user - except models.User.DoesNotExist: - user = None - else: - auth_user = auth.create_user(form.cleaned_data['email'].lower(), settings.ANY_PASSWORD) - user = None - auth.login(self.request, auth_user) - auth_user.first_name = form.cleaned_data['name'] - auth_user.save() - if user is None: - user = models.User( - auth_user=auth_user, - ) - user.phone = form.cleaned_data['phone'] - _update_user_subscriptions(user, form.cleaned_data['subscribe']) - user.save() - subject_dict = { - 'name': auth_user.first_name, - 'dog': self.dog.get_name(), - } - if 'for_breeding' == self.dog.status: - subject_dict['action'] = 'breed with' - elif 'fertile' == self.dog.status: - subject_dict['action'] = 'take a puppy from' - else: - subject_dict['action'] = 'buy' - mail.mail_managers( - '%(name)s wants to %(action)s %(dog)s' % subject_dict, - 'Email: %s\nDog\'s page: http://kapustkapust.ru%s\nPhone: %s' % (auth_user.email, self.dog.get_url(), user.phone), - ) - return self.render_to_response(self.get_context_data(success=True)) - - -class Sitemap(sitemaps.CMSSitemap): - def items(self): - pages = super(Sitemap, self).items() - return pages.exclude(reverse_id='common') From 202762f1a438bc5c2cabc46f2c1a75c2afd4f1e7 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Sat, 27 Aug 2016 16:07:58 +0300 Subject: [PATCH 27/48] Python 3.5: `@asyncio.coroutine` -> `async def`, `yield from` -> `await`. --- README.md | 2 +- instabot/following_service.py | 23 ++++----- instabot/instagram.py | 95 +++++++++++++++-------------------- instabot/like_service.py | 15 +++--- instabot/media_service.py | 23 ++++----- instabot/stats_service.py | 5 +- instabot/user_service.py | 38 +++++++------- 7 files changed, 88 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index 516e3f3..645b8d7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # InstaBot -Instagram bot that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses followers of people you have followed. +Instagram bot written in Python 3.5 that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses list of followers of people you have followed. During installation process it saves people followed by you as "followed long time ago" and unfollows them at the first start. diff --git a/instabot/following_service.py b/instabot/following_service.py index 160a0d6..4b6fcd5 100644 --- a/instabot/following_service.py +++ b/instabot/following_service.py @@ -17,25 +17,23 @@ def __init__(self, client, configuration): datetime.timedelta(hours=configuration.following_hours) self._stats_service = StatsService.get_instance() - @asyncio.coroutine - def run(self): + async def run(self): while True: try: - yield from self._unfollow() - yield from self._follow() + await self._unfollow() + await self._follow() except APILimitError as e: LOGGER.debug(e) except (APIError, APIJSONError) as e: LOGGER.debug(e) - yield from asyncio.sleep(5) + await asyncio.sleep(5) except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) - yield from asyncio.sleep(5) + await asyncio.sleep(5) else: - yield from asyncio.sleep(10) + await asyncio.sleep(10) - @asyncio.coroutine - def _follow(self): + async def _follow(self): ''' @raise APIError @raise APIJSONError @@ -47,7 +45,7 @@ def _follow(self): User.was_followed_at == None, ).order_by(User.following_depth, User.created): try: - yield from self._client.follow(user) + await self._client.follow(user) except (APINotAllowedError, APINotFoundError) as e: LOGGER.debug('Can\'t follow {}. {}'.format(user.username, e)) # Make user look like he was followed and was unfollowed @@ -60,8 +58,7 @@ def _follow(self): self._stats_service.increment('followed') user.save() - @asyncio.coroutine - def _unfollow(self): + async def _unfollow(self): ''' @raise APIError @raise APIJSONError @@ -74,7 +71,7 @@ def _unfollow(self): (User.was_followed_at <= unfollowing_threshold), ): try: - yield from self._client.unfollow(user) + await self._client.unfollow(user) except (APINotAllowedError, APINotFoundError) as e: LOGGER.debug('Can\'t unfollow {}. {}'.format(user.username, e)) else: diff --git a/instabot/instagram.py b/instabot/instagram.py index f66791c..a971bbf 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -48,8 +48,7 @@ def __init__(self, configuration): loop = asyncio.get_event_loop() loop.run_until_complete(self._do_login()) - @asyncio.coroutine - def _ajax(self, url, data=None, referer=None): + async def _ajax(self, url, data=None, referer=None): ''' @raise APIError @raise APIJSONError @@ -64,27 +63,27 @@ def _ajax(self, url, data=None, referer=None): 'X-CSRFToken': self._csrf_token, } url = BASE_URL + url - response = yield from self._session.post( + response = await self._session.post( url, data=data, headers=headers, ) if response.status == 404: response.close() - yield from self._sleep_success() + await self._sleep_success() raise APINotFoundError( 'AJAX response status code is 404 for {}'.format(url), ) elif 500 <= response.status < 600: response.close() - yield from self._sleep_success() + await self._sleep_success() raise APIError(response.status) - text = yield from response.text() + text = await response.text() try: response_json = json.loads(text) except ValueError as e: if 'too many requests' in text or 'temporarily blocked' in text: - yield from self._sleep_limit() + await self._sleep_limit() raise APILimitError( 'Too many AJAX requests. URL: {}'.format(url), ) @@ -97,23 +96,22 @@ def _ajax(self, url, data=None, referer=None): response=response, ), if response.status == 200: - yield from self._sleep_success() + await self._sleep_success() raise APIError(message) elif response.status == 400: - yield from self._sleep_success() + await self._sleep_success() raise APINotAllowedError(message) else: - yield from self._sleep_success() + await self._sleep_success() raise APIError(message) if response_json.get('status') != 'ok': raise APIError( 'AJAX request to {} is not OK: {}'.format(url, response_json), ) - yield from self._sleep_success() + await self._sleep_success() return response_json - @asyncio.coroutine - def _api(self, path=None, url=None): + async def _api(self, path=None, url=None): ''' @raise APIJSONError @raise APILimitError @@ -122,13 +120,13 @@ def _api(self, path=None, url=None): ''' if url is None: url = 'https://api.instagram.com/v1/{}'.format(path) - response = yield from self._anonymous_session.get( + response = await self._anonymous_session.get( url, params={ 'client_id': self._client_id, }, ) - response = yield from response.text() + response = await response.text() try: response = json.loads(response) except ValueError as e: @@ -136,12 +134,11 @@ def _api(self, path=None, url=None): 'Bad response for {}: {} Response: {}' .format(url, e, response), ) - yield from self._check_api_response(response) - yield from self._sleep_success() + await self._check_api_response(response) + await self._sleep_success() return response - @asyncio.coroutine - def _check_api_response(self, response): + async def _check_api_response(self, response): ''' @raise APIJSONError @raise APILimitError @@ -158,22 +155,21 @@ def _check_api_response(self, response): if code == 400: raise APINotAllowedError(message) elif code in (403, 429): - yield from self._sleep_limit() + await self._sleep_limit() raise APILimitError(message) else: raise APIJSONError(message) - @asyncio.coroutine - def _do_login(self): + async def _do_login(self): ''' @raise APIJSONError @raise APILimitError @raise APINotAllowedError @raise APIError ''' - yield from self._open(BASE_URL) + await self._open(BASE_URL) self._update_csrf_token() - yield from self._ajax( + await self._ajax( 'accounts/login/ajax/', data={ 'username': self._username, @@ -183,8 +179,7 @@ def _do_login(self): self._update_csrf_token() self.id = self._session.cookies['ds_user_id'].value - @asyncio.coroutine - def follow(self, user): + async def follow(self, user): ''' @raise APIJSONError @raise APILimitError @@ -193,7 +188,7 @@ def follow(self, user): @raise APIError ''' try: - yield from self._ajax( + await self._ajax( 'web/friendships/{}/follow/'.format(user.instagram_id), referer=user.get_url(), ) @@ -210,42 +205,39 @@ def follow(self, user): else: LOGGER.debug('{} was followed'.format(user.username)) - @asyncio.coroutine - def get_followed(self, user): + async def get_followed(self, user): ''' @raise APIJSONError @raise APILimitError @raise APINotAllowedError @raise APIError ''' - response = yield from self._api( + response = await self._api( 'users/{}/follows'.format(user.instagram_id), ) followed = response['data'] next_url = response['pagination'].get('next_url') while next_url: - response = yield from self._api(url=next_url) + response = await self._api(url=next_url) followed.extend(response['data']) next_url = response['pagination'].get('next_url') LOGGER.debug('{} followed users were fetched'.format(len(followed))) return followed - @asyncio.coroutine - def get_some_followers(self, user): + async def get_some_followers(self, user): ''' @raise APIJSONError @raise APILimitError @raise APINotAllowedError @raise APIError ''' - response = yield from self._api( + response = await self._api( 'users/{}/followed-by'.format(user.instagram_id), ) followers = response['data'] return followers - @asyncio.coroutine - def like(self, media): + async def like(self, media): ''' @raise APIError @raise APIJSONError @@ -254,7 +246,7 @@ def like(self, media): @raise APINotFoundError ''' try: - yield from self._ajax('web/likes/{}/like/'.format(media)) + await self._ajax('web/likes/{}/like/'.format(media)) except APILimitError as e: raise APILimitError( 'API limit was reached during liking {}. {}'.format(media, e), @@ -262,43 +254,38 @@ def like(self, media): else: LOGGER.debug('Liked {}'.format(media)) - @asyncio.coroutine - def _open(self, url): + async def _open(self, url): headers = { 'Referer': self._referer, } - response = yield from self._session.get(url, headers=headers) + response = await self._session.get(url, headers=headers) self._referer = url - response = yield from response.text() + response = await response.text() return response - @asyncio.coroutine - def relogin(self): - yield from self._session.close() + async def relogin(self): + await self._session.close() self._session.cookies.clear() - yield from self._do_login() + await self._do_login() - @asyncio.coroutine - def _sleep_limit(self): + async def _sleep_limit(self): LOGGER.debug( 'Sleeping for {:.0f} sec because of API limits' .format(self._limit_sleep_time), ) - yield from asyncio.sleep(self._limit_sleep_time) + await asyncio.sleep(self._limit_sleep_time) self._limit_sleep_time *= self._limit_sleep_time_coefficient - @asyncio.coroutine - def _sleep_success(self): + async def _sleep_success(self): if self._limit_sleep_time != self._limit_sleep_time_min: self._limit_sleep_time = self._limit_sleep_time_min self._success_sleep_time = self._success_sleep_time_max - yield from asyncio.sleep(self._success_sleep_time) + await asyncio.sleep(self._success_sleep_time) self._success_sleep_time = self._success_sleep_time_min + \ (self._success_sleep_time - self._success_sleep_time_min) * \ self._success_sleep_time_coefficient - @asyncio.coroutine - def unfollow(self, user): + async def unfollow(self, user): ''' @raise APIError @raise APIJSONError @@ -307,7 +294,7 @@ def unfollow(self, user): @raise APINotFoundError ''' try: - response = yield from self._ajax( + response = await self._ajax( 'web/friendships/{}/unfollow/'.format(user.instagram_id), referer=user.get_url(), ) diff --git a/instabot/like_service.py b/instabot/like_service.py index 652473b..814588a 100644 --- a/instabot/like_service.py +++ b/instabot/like_service.py @@ -14,23 +14,22 @@ def __init__(self, client, media_service): self._media_service = media_service self._stats_service = StatsService.get_instance() - @asyncio.coroutine - def run(self): - media = yield from self._media_service.pop() + async def run(self): + media = await self._media_service.pop() while True: try: - yield from self._client.like(media) + await self._client.like(media) except APILimitError as e: LOGGER.debug(e) except (APIError, APIJSONError) as e: LOGGER.debug(e) - yield from asyncio.sleep(5) + await asyncio.sleep(5) except (APINotAllowedError, APINotFoundError) as e: LOGGER.debug('Can\'t like {}. {}'.format(media, str(e))) - media = yield from self._media_service.pop() + media = await self._media_service.pop() except (IOError, OSError, ClientResponseError) as e: LOGGER.warning(e) - yield from asyncio.sleep(5) + await asyncio.sleep(5) else: - media = yield from self._media_service.pop() + media = await self._media_service.pop() self._stats_service.increment('liked') diff --git a/instabot/media_service.py b/instabot/media_service.py index e7a04d4..583a090 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -20,13 +20,12 @@ def __init__(self, configuration): self._media = [] self._session = ClientSession() - @asyncio.coroutine - def _get_media_by_hashtag(self, hashtag): + async def _get_media_by_hashtag(self, hashtag): url = 'https://www.instagram.com/explore/tags/{}/'.format( urllib.parse.quote(hashtag.encode('utf-8')), ) - response = yield from self._session.get(url) - response = yield from response.read() + response = await self._session.get(url) + response = await response.read() response = response.decode('utf-8', errors='ignore') match = re.search( r'', + response, + ) + if match is None: + raise APIError('Can\'t find JSON in the response: {}', response) + try: + response = json.loads(match.group(1)) + except ValueError as e: + raise APIError('Can\'t parse response JSON: {}'.format(e)) + try: + media = response['entry_data']['TagPage'][0]['tag'] + media = media['media']['nodes'] + media = [media_item['id'] for media_item in media] + except (KeyError, TypeError) as e: + raise APIError( + 'Can\'t obtain media from response JSON: {}'.format(e), + ) + LOGGER.debug( + '{} media about \"{}\" were fetched'.format(len(media), hashtag), + ) + return media + async def get_some_followers(self, user): """Fetches some amount of followers of given user. diff --git a/instabot/media_service.py b/instabot/media_service.py index 583a090..fe92b07 100644 --- a/instabot/media_service.py +++ b/instabot/media_service.py @@ -1,11 +1,7 @@ import asyncio import itertools -import json import logging -import re -import urllib.parse -from .errors import ConfigurationError, MediaError -from aiohttp import ClientSession +from .errors import APIError, ConfigurationError from aiohttp.errors import ClientResponseError LOGGER = logging.getLogger('instabot.media_service') @@ -13,49 +9,24 @@ class MediaService: - def __init__(self, configuration): + def __init__(self, client, configuration): self._hashtags = configuration.hashtags if len(self._hashtags) == 0: raise ConfigurationError('No hashtags were specified') self._media = [] - self._session = ClientSession() - - async def _get_media_by_hashtag(self, hashtag): - url = 'https://www.instagram.com/explore/tags/{}/'.format( - urllib.parse.quote(hashtag.encode('utf-8')), - ) - response = await self._session.get(url) - response = await response.read() - response = response.decode('utf-8', errors='ignore') - match = re.search( - r'', - response, - ) - if match is None: - raise MediaError() - response = json.loads(match.group(1)) - media = response['entry_data']['TagPage'][0]['tag']['media']['nodes'] - media = [m['id'] for m in media] - LOGGER.debug( - '{} media about \"{}\" were fetched'.format(len(media), hashtag), - ) - return media + self._client = client async def run(self): for hashtag in itertools.cycle(self._hashtags): - if len(self._media) < MEDIA_COUNT_MIN: - try: - self._media.extend( - (await self._get_media_by_hashtag(hashtag)), - ) - except (IOError, OSError, ClientResponseError, MediaError) \ - as e: - LOGGER.warning(e) - await asyncio.sleep(5) - else: - await asyncio.sleep(3) + try: + media = await self._client.get_media_by_hashtag(hashtag) + except (APIError, ClientResponseError, IOError, OSError) as e: + LOGGER.warning(e) + await asyncio.sleep(5) else: + self._media.extend(media) + await asyncio.sleep(3) + while len(self._media) >= MEDIA_COUNT_MIN: await asyncio.sleep(30) async def pop(self): diff --git a/instabot/user_service.py b/instabot/user_service.py index 4a9e41f..1df87bd 100644 --- a/instabot/user_service.py +++ b/instabot/user_service.py @@ -32,7 +32,7 @@ async def run(self): LOGGER.warning(e) await asyncio.sleep(5) else: - await asyncio.sleep(60) + await asyncio.sleep(60 * 5) async def _ensure_enough_users(self): users_to_follow_count = User.select() \ From 77b7a66f3c3f68012028222c7b582050053ff350 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Fri, 31 Mar 2017 23:56:33 +0300 Subject: [PATCH 44/48] Bumping version number to 0.3.5. Error response about temporary blocking is JSON encoded now. --- instabot/instabot.py | 2 +- instabot/instagram.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/instabot/instabot.py b/instabot/instabot.py index fee506f..af5b091 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -28,7 +28,7 @@ CONFIGURATION Path to configuration.yml file. ''' LOGGER = logging.getLogger('instabot') -__version__ = '0.3.4' +__version__ = '0.3.5' def install(configuration, db): diff --git a/instabot/instagram.py b/instabot/instagram.py index ff6387b..380fb7f 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -10,8 +10,8 @@ BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot.instagram') -USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:48.0) ' \ - 'Gecko/20100101 Firefox/48.0' +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) ' \ + 'Gecko/20100101 Firefox/52.0' class Client: @@ -88,11 +88,6 @@ async def _ajax(self, url, data=None, referer=None): try: response_json = json.loads(text) except ValueError as e: - if 'too many requests' in text or 'temporarily blocked' in text: - await self._sleep_limit() - raise APILimitError( - 'Too many AJAX requests. URL: {}'.format(url), - ) message = 'AJAX request to {url} is not JSON: {error} ' \ 'Response ({status}): \"{text}\"'.format( url=url, @@ -112,6 +107,12 @@ async def _ajax(self, url, data=None, referer=None): raise APIError(message) status = response_json.get('status') if status == 'fail': + message = response_json.get('message') + if isinstance(message, str) and 'temporarily blocked' in message: + await self._sleep_limit() + raise APILimitError( + 'Too many AJAX requests. URL: {}'.format(url), + ) raise APIFailError( 'AJAX request to {} was failed: {}'.format(url, response_json), ) @@ -121,7 +122,7 @@ async def _ajax(self, url, data=None, referer=None): ) LOGGER.debug('Request: {url} Response: {response}'.format( url=url, - response=response_json + response=response_json, )) await self._sleep_success() return response_json From 1e5aa0b0584944efbf1ce6b2463eb9c373ccd96b Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Sun, 9 Apr 2017 16:58:12 +0300 Subject: [PATCH 45/48] Logging was extended in case of temporary block. --- instabot/instagram.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/instabot/instagram.py b/instabot/instagram.py index 380fb7f..40b064c 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -111,7 +111,10 @@ async def _ajax(self, url, data=None, referer=None): if isinstance(message, str) and 'temporarily blocked' in message: await self._sleep_limit() raise APILimitError( - 'Too many AJAX requests. URL: {}'.format(url), + 'AJAX request to {url} was blocked: {response}'.format( + url=url, + response=response_json, + ), ) raise APIFailError( 'AJAX request to {} was failed: {}'.format(url, response_json), From 4209f4b423562cef12444d13b65972966b7616c7 Mon Sep 17 00:00:00 2001 From: Pyotr Ermishkin Date: Thu, 13 Jul 2017 22:43:03 +0300 Subject: [PATCH 46/48] Version number was bumped to 0.4.0. Dockerfile was added. --- Dockerfile | 13 +++++ README.md | 55 +++++++++++--------- docker-entrypoint.sh | 4 ++ instabot/instabot.py | 2 +- instabot/instagram.py | 116 ++++++++++++++++++++---------------------- 5 files changed, 105 insertions(+), 85 deletions(-) create mode 100644 Dockerfile create mode 100755 docker-entrypoint.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0c4c0ce --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.6 +MAINTAINER Pyotr Ermishkin + +COPY instabot /instabot/ +COPY docker-entrypoint.sh / +COPY instabot_runner.py / +COPY requirements.txt / + +VOLUME /configuration + +RUN pip install -r requirements.txt + +CMD ["/docker-entrypoint.sh"] diff --git a/README.md b/README.md index 3b8a37f..41fdc6d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # InstaBot -Instagram bot written in Python 3.5 that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses list of followers of people you have followed. +Instagram bot written in Python 3 that cycles through specified hashtags and automatically likes pictures with those hashtags to get more followers. The bot also follows people and unfollows them after specified period of time. Unfollowed people are saved in DB to prevent following them again. To find new people to follow it uses list of followers of people you have followed. During installation process it saves people followed by you as "followed long time ago" and unfollows them at the first start. @@ -8,12 +8,23 @@ The bot doesn't use Instagram API so all credentials you need are your login and ## Deployment - $ virtualenv --python=/usr/bin/python3 instabotenv - $ cd instabotenv - $ source bin/activate - (instabotenv) $ git clone https://github.com/quasiyoke/InstaBot.git - (instabotenv) $ cd InstaBot - (instabotenv) $ pip install -r requirements.txt +```sh +docker network create \ + --subnet=172.21.0.0/24 \ + instabot +docker run \ + --name=instabot-mysql \ + --net=instabot \ + --ip=172.21.0.2 \ + --env="MYSQL_ROOT_PASSWORD=ZEbMKcFQppk8m8PR3b" \ + --env="MYSQL_DATABASE=instabot" \ + --env="MYSQL_USER=instabot" \ + --env="MYSQL_PASSWORD=KbWj0Eua78YGLNLf3K" \ + --volume=`pwd`/lib:/var/lib/mysql \ + --detach \ + mysql:5.7 +docker build --tag=instabot . +``` Create MySQL DB: @@ -30,10 +41,10 @@ credentials: username: "your_username" password: "eKeFB2;AW6fS}z" db: - host: "localhost" - name: "instagram" + host: "172.21.0.2" + name: "instabot" user: "instabot" - password: "GT8H!b]5,9}A7" + password: "KbWj0Eua78YGLNLf3K" following_hours: 120 hashtags: - I @@ -56,14 +67,6 @@ logging: class: logging.StreamHandler level: DEBUG formatter: simple - file: - class: logging.handlers.RotatingFileHandler - level: DEBUG - formatter: simple - filename: log.log - maxBytes: 10485760 - backupCount: 10 - encoding: utf-8 loggers: instabot: level: DEBUG @@ -81,10 +84,14 @@ Where: * `logging` — logging setup as described in [this howto](https://docs.python.org/3/howto/logging.html). * `users_to_follow_cache_size` — how much users should be fetched for following. The cache is being filled in once a minute. Optional. By default bot won't follow anybody. -Create necessary DB tables: +Now you may run the bot: - (instabotenv) $ ./instabot_runner.py install configuration.yml - -Run: - - (instabotenv) $ ./instabot_runner.py configuration.yml +```sh +docker run \ + --name=instabot \ + --net=instabot \ + --ip=172.21.0.10 \ + --volume=`pwd`/configuration:/configuration \ + --detach \ + instabot +``` diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100755 index 0000000..ce93d04 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +/instabot_runner.py install /configuration/configuration.yml +/instabot_runner.py /configuration/configuration.yml diff --git a/instabot/instabot.py b/instabot/instabot.py index af5b091..f12a5da 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -28,7 +28,7 @@ CONFIGURATION Path to configuration.yml file. ''' LOGGER = logging.getLogger('instabot') -__version__ = '0.3.5' +__version__ = '0.4.0' def install(configuration, db): diff --git a/instabot/instagram.py b/instabot/instagram.py index 40b064c..f9e35ec 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -7,11 +7,11 @@ from .errors import APIError, APILimitError, \ APINotAllowedError, APINotFoundError, APIFailError from aiohttp import ClientSession +from http import HTTPStatus BASE_URL = 'https://www.instagram.com/' LOGGER = logging.getLogger('instabot.instagram') -USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) ' \ - 'Gecko/20100101 Firefox/52.0' +USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0' class Client: @@ -34,7 +34,7 @@ def __init__(self, configuration): self._session = ClientSession( cookies={ 'ig_pr': '1', - 'ig_vw': '1280', + 'ig_vw': '1920', }, headers={ 'User-Agent': USER_AGENT, @@ -64,78 +64,60 @@ async def _ajax(self, url, data=None, referer=None): """ if referer is not None: self._referer = referer - url = BASE_URL + url + url = f'{BASE_URL}{url}' headers = { 'Referer': self._referer, 'X-CSRFToken': self._csrf_token, } - response = await self._session.post( + async with self._session.post( url, data=data, headers=headers, - ) - if response.status == 404: - response.close() - await self._sleep_success() - raise APINotFoundError( - 'AJAX response status code is 404 for {}'.format(url), - ) - elif 500 <= response.status < 600: - response.close() - await self._sleep_success() - raise APIError(response.status) - text = await response.text() - try: - response_json = json.loads(text) - except ValueError as e: - message = 'AJAX request to {url} is not JSON: {error} ' \ - 'Response ({status}): \"{text}\"'.format( - url=url, - error=e, - status=response.status, - text=text, - response=response, - ), - if response.status == 200: - await self._sleep_success() - raise APIError(message) - elif response.status == 400: + ) as response: + if response.status == HTTPStatus.NOT_FOUND: + response.close() await self._sleep_success() - raise APINotAllowedError(message) - else: + raise APINotFoundError(f'AJAX response status code is 404 for {url}') + elif HTTPStatus.INTERNAL_SERVER_ERROR <= response.status: + response.close() await self._sleep_success() - raise APIError(message) - status = response_json.get('status') + raise APIError(response.status) + text = await response.text() + try: + response_dict = json.loads(text) + except ValueError as err: + reason = f'AJAX request to {url} is not JSON: {err} Response ({response.status}): \"{text}\"' + if response.status == HTTPStatus.OK: + await self._sleep_success() + raise APIError(reason) + elif response.status == HTTPStatus.BAD_REQUEST: + await self._sleep_success() + raise APINotAllowedError(reason) + else: + await self._sleep_success() + raise APIError(reason) + status = response_dict.get('status') if status == 'fail': - message = response_json.get('message') + message = response_dict.get('message') if isinstance(message, str) and 'temporarily blocked' in message: await self._sleep_limit() - raise APILimitError( - 'AJAX request to {url} was blocked: {response}'.format( - url=url, - response=response_json, - ), - ) - raise APIFailError( - 'AJAX request to {} was failed: {}'.format(url, response_json), - ) + raise APILimitError(f'AJAX request to {url} was blocked: {response_dict}') + raise APIFailError(f'AJAX request to {url} was failed: {response_dict}') elif status != 'ok': - raise APIError( - 'AJAX request to {} is not OK: {}'.format(url, response_json), - ) - LOGGER.debug('Request: {url} Response: {response}'.format( - url=url, - response=response_json, - )) + raise APIError(f'AJAX request to {url} is not OK: {response_dict}') + LOGGER.debug(f'Request: {url} Response: {response_dict}') await self._sleep_success() - return response_json + return response_dict async def _do_login(self): - """ - @raise APIJSONError - @raise APILimitError - @raise APINotAllowedError - @raise APIError + """Logins client session. + + Raises: + APIJSONError + APILimitError + APINotAllowedError + APIError + """ await self._open(BASE_URL) self._update_csrf_token() @@ -147,7 +129,12 @@ async def _do_login(self): }, ) self._update_csrf_token() - self.id = self._session.cookies['ds_user_id'].value + try: + self.id = self._session.cookies['ds_user_id'].value + except KeyError as err: + reason = 'Can\'t obtain user ID from cookies.' + LOGGER.exception(reason) + raise APIError(reason) from err async def follow(self, user): """ @@ -394,6 +381,15 @@ async def like(self, media): LOGGER.debug('Liked {}'.format(media)) async def _open(self, url): + """Opens given URL (HTTP GET). + + Args: + url (str) + + Returns: + str: Response. + + """ headers = { 'Referer': self._referer, } From 57d3043a414edcd46ee4f309bca85bf779a7795b Mon Sep 17 00:00:00 2001 From: Grigory Starinkin Date: Sat, 13 Jan 2018 20:33:11 +0000 Subject: [PATCH 47/48] fix get media by hashtag search updated to reflect recent instagram changes --- instabot/instagram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/instabot/instagram.py b/instabot/instagram.py index f9e35ec..88709d3 100644 --- a/instabot/instagram.py +++ b/instabot/instagram.py @@ -312,9 +312,9 @@ async def get_media_by_hashtag(self, hashtag): except ValueError as e: raise APIError('Can\'t parse response JSON: {}'.format(e)) try: - media = response['entry_data']['TagPage'][0]['tag'] - media = media['media']['nodes'] - media = [media_item['id'] for media_item in media] + tag = response['entry_data']['TagPage'][0]['graphql']['hashtag'] + edges = tag['edge_hashtag_to_media']['edges'] + media = [edge['node']['id'] for edge in edges] except (KeyError, TypeError) as e: raise APIError( 'Can\'t obtain media from response JSON: {}'.format(e), From 5426f5cabae61f90ffbac842dc5ec63710d1db02 Mon Sep 17 00:00:00 2001 From: Petr Ermishkin Date: Sun, 14 Jan 2018 02:52:17 +0300 Subject: [PATCH 48/48] Version number was bumped to 0.4.1 --- instabot/instabot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instabot/instabot.py b/instabot/instabot.py index f12a5da..a9c1adf 100644 --- a/instabot/instabot.py +++ b/instabot/instabot.py @@ -28,7 +28,7 @@ CONFIGURATION Path to configuration.yml file. ''' LOGGER = logging.getLogger('instabot') -__version__ = '0.4.0' +__version__ = '0.4.1' def install(configuration, db):