From 0d8e0d483225a1f9aee3665883f7114ff83a08d7 Mon Sep 17 00:00:00 2001 From: mario mendonca Date: Sun, 4 Sep 2022 16:50:45 -0300 Subject: [PATCH 1/4] feat: new endpoint to get many profile catches --- app/routes/routes.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/app/routes/routes.py b/app/routes/routes.py index d54fe0f..dae2bd0 100644 --- a/app/routes/routes.py +++ b/app/routes/routes.py @@ -3,7 +3,6 @@ from app.models.models import Analises, AnaliseSchema from app.services.botometer_service import BotometerService - @app.get("/catch") def catch(): handle = str(request.args.get('profile')) @@ -31,3 +30,15 @@ def complete(): def feedback(): return jsonify("feedback") +@app.get('/multicatches') +def multicatches(): + handle = str(request.args.get('profiles')) + users = handle.split(',') + + results = list() + for user in users: + botometer_service = BotometerService() + response = botometer_service.catch(user) + results.append(response) + + return jsonify(results), 200 From d2d7445b4a478aeb62cbcc8530eb8cd53d99e25b Mon Sep 17 00:00:00 2001 From: mario mendonca Date: Sun, 4 Sep 2022 16:52:33 -0300 Subject: [PATCH 2/4] feat: new endpoint to get many profile catches in parallel --- app/routes/routes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/app/routes/routes.py b/app/routes/routes.py index dae2bd0..adc8a37 100644 --- a/app/routes/routes.py +++ b/app/routes/routes.py @@ -2,6 +2,7 @@ from flask import jsonify, request from app.models.models import Analises, AnaliseSchema from app.services.botometer_service import BotometerService +from concurrent.futures import ThreadPoolExecutor @app.get("/catch") def catch(): @@ -42,3 +43,19 @@ def multicatches(): results.append(response) return jsonify(results), 200 + +@app.get('/multicatchesparallel') +def multicatches2(): + handle = str(request.args.get('profiles')) + users = handle.split(',') + results = list() + + def getResult(username): + botometer_service = BotometerService() + response = botometer_service.catch(username) + results.append(response) + + with ThreadPoolExecutor(max_workers=10) as pool: + pool.map(getResult, users) + + return jsonify(results), 200 From 8d6174fca577146a9fe85bb0c4e40408fb54073c Mon Sep 17 00:00:00 2001 From: Gabriel Augusto Date: Sun, 4 Sep 2022 19:58:18 -0300 Subject: [PATCH 3/4] fix: botProbability function signature --- app/services/botometer_service.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/services/botometer_service.py b/app/services/botometer_service.py index 82e6aca..4f6e491 100644 --- a/app/services/botometer_service.py +++ b/app/services/botometer_service.py @@ -105,7 +105,10 @@ def check_cache_validity(self, analise, handle): db.session.add(analise) db.session.commit() - def botProbability(self, handle, user, timeline): + def botProbability(self, handle): p = BotProbability() + user = self.findUserAnalisisByHandle(handle=handle) + response = self.twitter_handler.findByHandle(handle=handle) + timeline = self.twitter_handler.getUserTimeline(response.twitter_id) response = p.botProbability(handle=handle, twitterTimeline=timeline, twitterUserData=user) return response \ No newline at end of file From 75b6e9af0cee8aa366aadf1b1520e33a859fb050 Mon Sep 17 00:00:00 2001 From: Gabriel Augusto Date: Sun, 4 Sep 2022 20:00:26 -0300 Subject: [PATCH 4/4] fix: rename fields, adjust parameters and identifying some exceptions --- app/models/models.py | 10 +++++++--- app/models/prepare_data.py | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index f236d7f..5b49f3e 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -110,16 +110,20 @@ def predict(self, users_data, timeline_data, path_input_model="app/models/pegabo return predicted_proba def botProbability(self, handle, twitterTimeline, twitterUserData): + analise = 0 try: df_timeline = pd.DataFrame.from_dict(twitterTimeline) - df_user_data = pd.DataFrame.from_dict(twitterUserData) + df_user_data = pd.DataFrame([twitterUserData]) analise = self.predict(df_user_data, df_timeline) self.total = round(analise[0][1]*100, 2) + except: - self.total = -1 + raise Exception("Problem(s) probably in predict function") + return edict({ 'pegabot_version': 'version-1.0', 'handle': handle, - 'total': self.total + 'total': self.total, + 'analysis': analise.tolist() }) diff --git a/app/models/prepare_data.py b/app/models/prepare_data.py index c59c55e..b7bfb7e 100644 --- a/app/models/prepare_data.py +++ b/app/models/prepare_data.py @@ -26,6 +26,9 @@ def prepare_data(self, df_users, df_timeline, path_trendtopics_data = "app/model df_users['É bot?'] = '' #Extrai as informações de retweet + if not('tweet_is_retweet' in df_timeline.columns and 'tweet_text' in df_timeline.columns): + raise Exception("Problems on tweets") + df_timeline['retweet_tratado'] = df_timeline['tweet_is_retweet'].apply(lambda x: "sim" if (x == 'True' or x == True) else "não") df_timeline['tweet_com_rt_tratado'] = df_timeline['tweet_text'].apply(lambda x: "sim" if x.find("RT @") != -1 else "não" ) @@ -91,7 +94,7 @@ def reune_rt(retweet,rt): df = df_result_merge #Monta o conjunto de treinamento - feature_cols = ['followers_count', 'friends_count', 'Tempo mediano', 'Tempo menor'] + feature_cols = ['twitter_followers_count', 'twitter_friends_count', 'Tempo mediano', 'Tempo menor'] x = df[feature_cols] qtd_hashtags = df['tweet_hashtags'].apply(lambda x: len(x.replace("[","").replace("]","").replace(", \'","$").split("$"))) @@ -105,7 +108,7 @@ def reune_rt(retweet,rt): #O tamanho do nome e do login tam_username = df['handle'].apply(lambda x: len(str(x))) - tam_nome = df['name'].apply(lambda x: len(str(x))) + tam_nome = df['twitter_user_name'].apply(lambda x: len(str(x))) x['Tamanho do username'] = np.array(list(tam_username)) x['Tamanho do nome'] = np.array(list(tam_nome))