diff --git a/app/models/models.py b/app/models/models.py index f236d7f..5b49f3e 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -110,16 +110,20 @@ def predict(self, users_data, timeline_data, path_input_model="app/models/pegabo return predicted_proba def botProbability(self, handle, twitterTimeline, twitterUserData): + analise = 0 try: df_timeline = pd.DataFrame.from_dict(twitterTimeline) - df_user_data = pd.DataFrame.from_dict(twitterUserData) + df_user_data = pd.DataFrame([twitterUserData]) analise = self.predict(df_user_data, df_timeline) self.total = round(analise[0][1]*100, 2) + except: - self.total = -1 + raise Exception("Problem(s) probably in predict function") + return edict({ 'pegabot_version': 'version-1.0', 'handle': handle, - 'total': self.total + 'total': self.total, + 'analysis': analise.tolist() }) diff --git a/app/models/prepare_data.py b/app/models/prepare_data.py index c59c55e..b7bfb7e 100644 --- a/app/models/prepare_data.py +++ b/app/models/prepare_data.py @@ -26,6 +26,9 @@ def prepare_data(self, df_users, df_timeline, path_trendtopics_data = "app/model df_users['É bot?'] = '' #Extrai as informações de retweet + if not('tweet_is_retweet' in df_timeline.columns and 'tweet_text' in df_timeline.columns): + raise Exception("Problems on tweets") + df_timeline['retweet_tratado'] = df_timeline['tweet_is_retweet'].apply(lambda x: "sim" if (x == 'True' or x == True) else "não") df_timeline['tweet_com_rt_tratado'] = df_timeline['tweet_text'].apply(lambda x: "sim" if x.find("RT @") != -1 else "não" ) @@ -91,7 +94,7 @@ def reune_rt(retweet,rt): df = df_result_merge #Monta o conjunto de treinamento - feature_cols = ['followers_count', 'friends_count', 'Tempo mediano', 'Tempo menor'] + feature_cols = ['twitter_followers_count', 'twitter_friends_count', 'Tempo mediano', 'Tempo menor'] x = df[feature_cols] qtd_hashtags = df['tweet_hashtags'].apply(lambda x: len(x.replace("[","").replace("]","").replace(", \'","$").split("$"))) @@ -105,7 +108,7 @@ def reune_rt(retweet,rt): #O tamanho do nome e do login tam_username = df['handle'].apply(lambda x: len(str(x))) - tam_nome = df['name'].apply(lambda x: len(str(x))) + tam_nome = df['twitter_user_name'].apply(lambda x: len(str(x))) x['Tamanho do username'] = np.array(list(tam_username)) x['Tamanho do nome'] = np.array(list(tam_nome)) diff --git a/app/routes/routes.py b/app/routes/routes.py index d54fe0f..adc8a37 100644 --- a/app/routes/routes.py +++ b/app/routes/routes.py @@ -2,7 +2,7 @@ from flask import jsonify, request from app.models.models import Analises, AnaliseSchema from app.services.botometer_service import BotometerService - +from concurrent.futures import ThreadPoolExecutor @app.get("/catch") def catch(): @@ -31,3 +31,31 @@ def complete(): def feedback(): return jsonify("feedback") +@app.get('/multicatches') +def multicatches(): + handle = str(request.args.get('profiles')) + users = handle.split(',') + + results = list() + for user in users: + botometer_service = BotometerService() + response = botometer_service.catch(user) + results.append(response) + + return jsonify(results), 200 + +@app.get('/multicatchesparallel') +def multicatches2(): + handle = str(request.args.get('profiles')) + users = handle.split(',') + results = list() + + def getResult(username): + botometer_service = BotometerService() + response = botometer_service.catch(username) + results.append(response) + + with ThreadPoolExecutor(max_workers=10) as pool: + pool.map(getResult, users) + + return jsonify(results), 200 diff --git a/app/services/botometer_service.py b/app/services/botometer_service.py index 82e6aca..4f6e491 100644 --- a/app/services/botometer_service.py +++ b/app/services/botometer_service.py @@ -105,7 +105,10 @@ def check_cache_validity(self, analise, handle): db.session.add(analise) db.session.commit() - def botProbability(self, handle, user, timeline): + def botProbability(self, handle): p = BotProbability() + user = self.findUserAnalisisByHandle(handle=handle) + response = self.twitter_handler.findByHandle(handle=handle) + timeline = self.twitter_handler.getUserTimeline(response.twitter_id) response = p.botProbability(handle=handle, twitterTimeline=timeline, twitterUserData=user) return response \ No newline at end of file