diff --git a/core/forecast/infra/services/forecast.py b/core/forecast/infra/services/forecast.py index 5755abe..3948909 100644 --- a/core/forecast/infra/services/forecast.py +++ b/core/forecast/infra/services/forecast.py @@ -2,12 +2,13 @@ from core.forecast.infra.models import Forecast from core.occurrences.infra.models import Occurrence from sklearn.ensemble import RandomForestClassifier -from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.calibration import CalibratedClassifierCV from imblearn.over_sampling import SMOTE - +from imblearn.combine import SMOTEENN import pandas as pd +import numpy as np def runForecast(repo: MachineLearningRepository): # Aqui é onde realmente acontece a previsão com IA # Treinamento @@ -51,28 +52,29 @@ def runForecast(repo: MachineLearningRepository): # Aqui é onde realmente acont df["flood"] = df.apply( lambda row: 1 if ((occurrences["neighborhood"] == row["neighborhood"]) & (occurrences["date"] == row["date"])).any() or - (row.rain > 10 and row.humidity > 60 and row.elevation < 10) + (row.rain > 1.5 and row.humidity > 50 and row.elevation < 15) else 0, axis=1 ) - print(df["flood"]) + new_df = df.copy() + new_df[["rain", "temperature", "humidity", "pressure"]] += np.random.normal(0, 0.075, new_df[["rain", "temperature", "humidity", "pressure"]].shape) features = ["rain", "temperature", "humidity", "pressure", "elevation"] X = df[features].values Y = df["flood"].values # Aqui ele vai preencher os climas que estiverem sem registro de alagamento com NaN - scaler = StandardScaler() # Padronizador + scaler = MinMaxScaler() # Padronizador X_scaled = scaler.fit_transform(X) # Treinar com base em X - X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.25) # Devolve variáveis de teste e de treinamento da IA com base no X padrão + X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.25, random_state=42) # Devolve variáveis de teste e de treinamento da IA com base no X padrão rf = RandomForestClassifier( - n_estimators=1000, + n_estimators=500, max_depth=None, - random_state=None, + random_state=42, max_features="sqrt", - class_weight="balanced" + class_weight={0: 1, 1: 1000} ) clf = CalibratedClassifierCV(rf, cv=3, method="isotonic") - smote = SMOTE() + smote = SMOTEENN(smote=SMOTE(sampling_strategy=0.75, k_neighbors=15), random_state=42) X_res, Y_res = smote.fit_resample(X_train, Y_train) clf.fit(X_res, Y_res) @@ -83,10 +85,15 @@ def runForecast(repo: MachineLearningRepository): # Aqui é onde realmente acont X_future = df_future[features].values X_future_scaled = scaler.transform(X_future) + X_future_scaled += np.random.normal(0, 0.0075, X_future_scaled.shape) Y_predict = clf.predict(X_future_scaled) Y_proba = clf.predict_proba(X_future_scaled)[:, 1] + df_future = df_future.reset_index(drop=True) + df_future['probability'] = Y_proba + df_future['prob_percentile'] = df_future['probability'].rank(pct=True) + for i, row in enumerate(df_future.itertuples(index=False)): Forecast.objects.update_or_create( date = row.date, diff --git a/core/weather/infra/repository.py b/core/weather/infra/repository.py index d7920b6..f38cb5f 100644 --- a/core/weather/infra/repository.py +++ b/core/weather/infra/repository.py @@ -2,7 +2,7 @@ from core.weather.infra.models import Weather from core.weather.infra.services.weather import fillClimate as fillClimateService, fillElevation, fillFutureClimate, fillFlood import pandas as pd -from datetime import date, timedelta +from datetime import date, timedelta, datetime today = date.today() forecast_start = today - timedelta(days=3) @@ -10,7 +10,12 @@ class WeatherRepositoryImpl(WeatherRepository): def fillAll(self, lat, lon, neighborhood, start, end): - weather = self.fillWeather(lat, lon, start, min(end, today)) + if isinstance(start, str): + start = datetime.strptime(start, "%Y-%m-%d").date() + if isinstance(end, str): + end = datetime.strptime(end, "%Y-%m-%d").date() + end = end + weather = self.fillWeather(lat, lon, start, today) future = self.fillFutureWeather(lat, lon) flood = self.fillFlood(lat, lon) elevation = self.fillElevation(lat, lon) @@ -18,26 +23,30 @@ def fillAll(self, lat, lon, neighborhood, start, end): all_days = weather["days"] + future["days"] for i, day in enumerate(all_days): - if i > len(weather["days"]): - source = future - idx = i - len(weather["days"]) - else: + if i < len(weather["days"]): source = weather idx = i + else: + source = future + idx = i - len(weather["days"]) + + def safe_get(arr, idx, default=0): + if arr is None: + return default + return arr[idx] if idx < len(arr) else default - for i in range(len(weather["days"])): # para cada dia climates.append( Weather( date=day, neighborhood=neighborhood, latitude=lat, longitude=lon, - rain=source.get("rain", [None]*len(weather["days"]))[i] or future.get("rain", [None]*len(future["days"]))[i] or 0, # se não tiver "rain", retorne None para cada dia - temperature=source.get("temperature", [None]*len(source["days"]))[i] or future.get("temperature", [None]*len(future["days"]))[i] or 0, - humidity=source.get("humidity", [None]*len(source["days"]))[i] or future.get("humidity", [None]*len(future["days"]))[i] or 0, + rain=safe_get(source.get("rain"), idx, safe_get(future.get("rain"), i)), # se não tiver "rain", retorne None para cada dia + temperature=safe_get(source.get("temperature"), idx, safe_get(future.get("temperature"), i)), + humidity=safe_get(source.get("humidity"), idx, safe_get(future.get("humidity"), i)), elevation=elevation.get("elevation"), - pressure=source.get("pressure", [None]*len(weather["days"]))[i] or future.get("pressure", [None]*len(future["days"]))[i] or 0, - river_discharge=flood.get("river_discharge", [None]*len(weather["days"]))[i] or 0 + pressure=safe_get(source.get("pressure"), idx, safe_get(future.get("pressure"), i)), + river_discharge=safe_get(flood.get("river_discharge"), idx) ) ) diff --git a/core/weather/infra/services/weather.py b/core/weather/infra/services/weather.py index 3b5045f..1ca6b48 100644 --- a/core/weather/infra/services/weather.py +++ b/core/weather/infra/services/weather.py @@ -73,6 +73,13 @@ def fillFutureClimate(lat, lon, start, end, retries=3, wait=60): print(f"Erro inesperado na API: {data}") break + return { + "days": [], + "rain": [], + "temperature": [], + "humidity": [] + } + def fillFlood(lat: float, lon: float): url = f"https://flood-api.open-meteo.com/v1/flood" params = { diff --git a/core/weather/presentation/tasks/fillWeatherTask.py b/core/weather/presentation/tasks/fillWeatherTask.py index 38bfb8d..7cc381b 100644 --- a/core/weather/presentation/tasks/fillWeatherTask.py +++ b/core/weather/presentation/tasks/fillWeatherTask.py @@ -1,9 +1,12 @@ from celery import shared_task from core.weather.app.services import WeatherService from core.weather.infra.repository import WeatherRepositoryImpl +from datetime import datetime @shared_task def fillWeather(lat, lon, neighborhood, start, end): + start = datetime.strptime(start, "%Y-%m-%d").date() if isinstance(start, str) else start + end = datetime.strptime(end, "%Y-%m-%d").date() if isinstance(end, str) else end service = WeatherService(repository=WeatherRepositoryImpl()) result = service.execute(lat, lon, neighborhood, start, end) return result \ No newline at end of file