diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..514af5b --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 120 +extend-ignore = E203,W503 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4762030 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,42 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + services: + mysql: + image: mysql:8.0 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_ROOT_HOST: '%' + MYSQL_DATABASE: llm_platform + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Configure database for tests + run: sed -i 's/host=db/host=127.0.0.1/' db.config.example + - name: Run linters + run: | + flake8 --ignore=E501,W504,E261 app.py controllers/ models/ views/ + mypy app.py controllers/ models/ views/ + - name: Run tests + run: pytest --cov=controllers --cov=models diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d777b03 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +CMD ["streamlit", "run", "app.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/README.md b/README.md index 4dc2eca..fe4858f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,242 @@ -# MVP -Repository per il Minimum Viable Product della fase PB +# Artificial QI – Minimum Viable Product🧠⚙️ + +Piattaforma sviluppata dal gruppo **7Commits** per la valutazione delle prestazioni di LLM (Large Language Models). + +## 👥 Componenti del Gruppo: +- **Marco Cola** - 2079237 +- **Ruize Lin** - 2068236 +- **Stefano Dal Poz** - 1204683 +- **Giulia Hu** - 2009118 +- **Mattia Piva** - 2008065 +- **Giada Rossi** - 2045353 + +--- + +## 📦 Requisiti + +> Puoi eseguire la webapp **con Docker** (consigliato) oppure **con Python + MySQL** in locale. + +### 🔧 Requisiti comuni + +- [x] **Git** +- [x] File `db.config` (creato da `db.config.example`) + +--- + +### 📦 Requisiti per l’**esecuzione con Docker** (consigliato) + +- [x] Docker ≥ 20.10 +- [x] Docker Compose + +### 🐍 Requisiti per l’**esecuzione con Python** + +- [x] Python ≥ 3.10 +- [x] MySQL ≥ 5.7 +- [x] `pip install -r requirements.txt` + +--- + +## 📁 Clonazione del progetto + +```bash +git clone https://github.com/7Commits/MVP +cd MVP +cp db.config.example db.config + +``` + +## 🐳 Avvio con Docker + +Assicurati che il file db.config contenga: +```bash +[mysql] +host=db +user=root +password= +database=llm_platform +port=3306 +``` + +poi esegui: +```bash +docker compose up -d --build +``` + +infine accedi alla webapp: +[localhost:8501](http://localhost:8501) + + +## 🛑 Arresto dell'app (Docker) + +Per mettere in pausa: + +```bash +docker compose stop +``` + +Per spegnere e rimuovere: + +```bash +docker compose down +``` + +## 🐍 Avvio con Python + MySQL locale + +1. Avvia il tuo server MySQL locale (con porta, user e password compatibili) + +2. Modifica db.config così: +```bash +[mysql] +host=localhost +user=root +password=your_password_here +database=llm_platform +port=3306 +ssl_ca= +``` +3. Installa le dipendenze Python: +```bash +pip install -r requirements.txt +``` + +4. Avvia l'app con: +```bash +python -m streamlit run app.py --server.port 8501 +``` + +# 🧪 Guida all’Uso dell’Applicazione + +## 🏠 Pagina Home + +La pagina iniziale della webapp mostra una descrizione dell’app e un menu laterale con le varie sezioni disponibili. + +--- + +## ⚙️ Configurazione API + +In questa sezione puoi creare dei **preset** di connessione per i vari LLM, inserendo: + +- Nome del preset +- Chiave API +- Endpoint del provider (URL) +- Modello da utilizzare +- Temperatura di generazione + +Puoi salvare, modificare o eliminare preset già configurati. + +--- + +## ❓ Gestione Domande + +Qui puoi gestire le **domande e risposte attese**: + +- Inserire manualmente domanda e risposta, oppure set di domande e risposte +- Modificare o eliminare una voce +- Importare un file `.csv` o `.json` con domande e risposte attese + +### 📄 Esempio di formato richiesto per importazione domande e risposte CSV +Deve includere le colonne 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. +```csv +domanda,risposta_attesa,categoria +"Quanto fa 2+2?","4","Matematica Base" +"Qual è la capitale della Francia?","Parigi","Geografia" +"Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" +``` + +### 📄 Esempio di formato richiesto per importazione domande e risposte JSON +Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. +```json +[ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } +] +``` + +### 📄 Esempio di formato richiesto per importazione set di domande e risposte CSV +Ogni riga deve contenere le colonne name (nome del set), id (ID della domanda), domanda (testo), risposta_attesa e categoria. +```csv +name,id,domanda,risposta_attesa,categoria +Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia +Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia +Matematica Base,3,Quanto fa 2+2?,4,Matematica +Matematica Base,4,Quanto fa 10*4?,40,Matematica +``` + + +### 📄 Esempio di formato richiesto per importazione set di domande e risposte JSON + +```json +[ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } +] +``` +#### Note importazione: + +- Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente +- Se un set con lo stesso nome esiste già, verrà saltato +- Solo le domande nuove verranno aggiunte al database +- Le domande esistenti verranno referenziate nei nuovi set + + +## 💬 Supporto tecnico + +In caso di problemi o domande, contattare: + +- 📧 Email: [7commits@gmail.com](mailto:7commits@gmail.com) + +Inserendo eventuali messaggi di errore e una breve descrizione del problema. + + + + + + + + + + diff --git a/app.py b/app.py new file mode 100644 index 0000000..cd80f3e --- /dev/null +++ b/app.py @@ -0,0 +1,41 @@ +import logging + +import streamlit as st + +from views.session_state import initialize_session_state +from views.style_utils import load_css +from utils.startup_utils import setup_logging + +logger = logging.getLogger(__name__) + +setup_logging() +logger.info("Applicazione avviata") + +# Imposta la configurazione della pagina +st.set_page_config( + page_title="LLM Test Evaluation Platform", + page_icon="🧠", + layout="wide", + initial_sidebar_state="expanded" +) +initialize_session_state() + +# Applicazione principale +st.title("🧠 LLM Test Evaluation Platform - Artificial QI") + +# Aggiungi CSS personalizzato e stili globali +load_css() + +# --- Definizione pagine con il nuovo sistema --- +Home = st.Page("views/home.py", title="Home", icon=":material/home:", default=True) +Configurazione_API = st.Page("views/api_configurazione.py", title="Configurazione API", icon=":material/api:") +Gestione_domande = st.Page("views/gestione_domande.py", title="Gestione Domande", icon=":material/construction:") +Gestione_set = st.Page("views/gestione_set.py", title="Gestione Set di Domande", icon=":material/list:") +Esecuzione_test = st.Page("views/esecuzione_test.py", title="Esecuzione Test", icon=":material/rule_settings:") +Visualizza_risultati = st.Page("views/visualizza_risultati.py", + title="Visualizzazione Risultati", + icon=":material/bar_chart:") + +# --- Navigazione --- +pg = st.navigation([Home, Configurazione_API, Gestione_domande, Gestione_set, Esecuzione_test, Visualizza_risultati]) +pg.run() diff --git a/controllers/__init__.py b/controllers/__init__.py new file mode 100644 index 0000000..3d410d6 --- /dev/null +++ b/controllers/__init__.py @@ -0,0 +1,119 @@ +"""Esporta le utilità dei controller per uso esterno.""" + +# Gestione dei preset API +import logging +from .startup_controller import get_initial_state + +from .api_preset_controller import ( + load_presets, + refresh_api_presets, + list_presets, + get_preset_by_id, + get_preset_by_name, + validate_preset, + save_preset, + delete_preset, + test_api_connection, +) + +# Operazioni CRUD sulle domande +from .question_controller import ( + load_questions, + refresh_questions, + add_question, + update_question, + delete_question, + get_filtered_questions, + save_question_action, + delete_question_action, + import_questions_action, + get_question_text, + get_question_category, + export_questions_action, +) + +# Gestione dei set di domande +from .question_set_controller import ( + load_sets, + refresh_question_sets, + create_set, + update_set, + delete_set, + prepare_sets_for_view, + export_sets_action, +) + +# Risultati e utilità di valutazione +from .test_controller import ( + load_results, + refresh_results, + import_results_action, + export_results_action, + generate_answer, + evaluate_answer, + run_test, +) + +from .result_controller import ( + get_results, + list_set_names, + list_model_names, + prepare_select_options, +) + +from models.test_result import TestResult + +calculate_statistics = TestResult.calculate_statistics + +# Funzioni di avvio +logger = logging.getLogger(__name__) + + +__all__ = [ + # Preset API + "load_presets", + "refresh_api_presets", + "list_presets", + "get_preset_by_id", + "get_preset_by_name", + "validate_preset", + "save_preset", + "delete_preset", + "test_api_connection", + # Domande + "load_questions", + "refresh_questions", + "add_question", + "update_question", + "delete_question", + "get_filtered_questions", + "save_question_action", + "delete_question_action", + "import_questions_action", + "get_question_text", + "get_question_category", + "export_questions_action", + # Set di domande + "load_sets", + "refresh_question_sets", + "create_set", + "update_set", + "delete_set", + "prepare_sets_for_view", + "export_sets_action", + # Risultati dei test + "load_results", + "refresh_results", + "import_results_action", + "export_results_action", + "generate_answer", + "evaluate_answer", + "calculate_statistics", + "run_test", + "get_results", + "list_set_names", + "list_model_names", + "prepare_select_options", + # Avvio + "get_initial_state", +] diff --git a/controllers/__pycache__/__init__.cpython-311.pyc b/controllers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..62c2571 Binary files /dev/null and b/controllers/__pycache__/__init__.cpython-311.pyc differ diff --git a/controllers/__pycache__/api_preset_controller.cpython-311.pyc b/controllers/__pycache__/api_preset_controller.cpython-311.pyc new file mode 100644 index 0000000..825200e Binary files /dev/null and b/controllers/__pycache__/api_preset_controller.cpython-311.pyc differ diff --git a/controllers/__pycache__/openai_client.cpython-311.pyc b/controllers/__pycache__/openai_client.cpython-311.pyc new file mode 100644 index 0000000..71c50f8 Binary files /dev/null and b/controllers/__pycache__/openai_client.cpython-311.pyc differ diff --git a/controllers/__pycache__/question_controller.cpython-311.pyc b/controllers/__pycache__/question_controller.cpython-311.pyc new file mode 100644 index 0000000..019cf3e Binary files /dev/null and b/controllers/__pycache__/question_controller.cpython-311.pyc differ diff --git a/controllers/__pycache__/question_set_controller.cpython-311.pyc b/controllers/__pycache__/question_set_controller.cpython-311.pyc new file mode 100644 index 0000000..16218a3 Binary files /dev/null and b/controllers/__pycache__/question_set_controller.cpython-311.pyc differ diff --git a/controllers/__pycache__/result_controller.cpython-311.pyc b/controllers/__pycache__/result_controller.cpython-311.pyc new file mode 100644 index 0000000..4ddd7e4 Binary files /dev/null and b/controllers/__pycache__/result_controller.cpython-311.pyc differ diff --git a/controllers/__pycache__/startup_controller.cpython-311.pyc b/controllers/__pycache__/startup_controller.cpython-311.pyc new file mode 100644 index 0000000..32d5779 Binary files /dev/null and b/controllers/__pycache__/startup_controller.cpython-311.pyc differ diff --git a/controllers/__pycache__/test_controller.cpython-311.pyc b/controllers/__pycache__/test_controller.cpython-311.pyc new file mode 100644 index 0000000..05c9ef1 Binary files /dev/null and b/controllers/__pycache__/test_controller.cpython-311.pyc differ diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py new file mode 100644 index 0000000..9c2be2e --- /dev/null +++ b/controllers/api_preset_controller.py @@ -0,0 +1,168 @@ +"""Utility per la gestione dei preset API.""" + +import logging + +import uuid +from typing import List, Optional, Tuple + +import pandas as pd + +from models.api_preset import APIPreset +from utils.cache import ( + get_api_presets as _get_api_presets, + refresh_api_presets as _refresh_api_presets, +) +from openai import APIConnectionError, APIStatusError, RateLimitError + +from utils import openai_client +logger = logging.getLogger(__name__) + + +def load_presets() -> pd.DataFrame: + """Restituisce i preset API utilizzando la cache.""" + return _get_api_presets() + + +def refresh_api_presets() -> pd.DataFrame: + """Svuota e ricarica la cache dei preset API.""" + return _refresh_api_presets() + + +def list_presets(df: pd.DataFrame | None = None) -> List[dict]: + """Restituisce l'elenco dei preset come lista di dizionari.""" + if df is None: + df = load_presets() + return df.to_dict(orient="records") + + +def get_preset_by_id( + preset_id: str, df: pd.DataFrame | None = None +) -> Optional[dict]: + """Recupera un singolo preset dato il suo ID.""" + if df is None: + df = load_presets() + match = df[df["id"] == preset_id] + if match.empty: + return None + return match.iloc[0].to_dict() + + +def get_preset_by_name( + name: str, df: pd.DataFrame | None = None +) -> Optional[dict]: + """Recupera un singolo preset dato il suo nome.""" + if df is None: + df = load_presets() + match = df[df["name"] == name] + if match.empty: + return None + return match.iloc[0].to_dict() + + +def validate_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, str]: + """Valida i dati di un preset prima del salvataggio.""" + name = data.get("name", "").strip() + if not name: + return False, "Il nome del preset non può essere vuoto." + + df = load_presets() + if preset_id: + df = df[df["id"] != preset_id] + if name in df["name"].values: + return False, f"Un preset con nome '{name}' esiste già." + return True, "" + + +def save_preset( + data: dict, preset_id: Optional[str] = None +) -> Tuple[bool, str, pd.DataFrame]: + """Salva un nuovo preset o aggiorna uno esistente.""" + is_valid, message = validate_preset(data, preset_id) + if not is_valid: + return False, message, load_presets() + + df = load_presets() + preset_data = { + "name": data.get("name"), + "provider_name": data.get("provider_name", ""), + "endpoint": data.get("endpoint"), + "api_key": data.get("api_key"), + "model": data.get("model"), + "temperature": float(data.get("temperature", 0.0)), + "max_tokens": int(data.get("max_tokens", 1000)), + } + + if preset_id: + idx = df.index[df["id"] == preset_id] + if not idx.empty: + for key, value in preset_data.items(): + df.loc[idx[0], key] = value + success_message = f"Preset '{preset_data['name']}' aggiornato con successo!" + else: + preset_data["id"] = str(uuid.uuid4()) + df = pd.concat([df, pd.DataFrame([preset_data])], ignore_index=True) + success_message = f"Preset '{preset_data['name']}' creato con successo!" + + presets = [APIPreset(**row) for row in df.to_dict(orient="records")] + APIPreset.save(presets) + updated_df = refresh_api_presets() + return True, success_message, updated_df + + +def delete_preset(preset_id: str) -> Tuple[bool, str, pd.DataFrame]: + """Elimina un preset e ritorna lo stato aggiornato.""" + df = load_presets() + match = df[df["id"] == preset_id] + if match.empty: + return False, "Preset non trovato.", df + + preset_name = match.iloc[0]["name"] + APIPreset.delete(preset_id) + updated_df = refresh_api_presets() + return True, f"Preset '{preset_name}' eliminato.", updated_df + + +def test_api_connection( + api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int +) -> Tuple[bool, str]: + """Testa la connessione all'API LLM con i parametri forniti.""" + + try: + client = openai_client.get_openai_client(api_key=api_key, base_url=endpoint) + except openai_client.ClientCreationError: + return False, "Client API non inizializzato. Controlla chiave API e endpoint." + + try: + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "user", + "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'", + } + ], + temperature=temperature, + max_tokens=max_tokens, + ) + content = response.choices[0].message.content or "" + if "Connessione riuscita." in content: + return True, "Connessione API riuscita!" + return ( + False, + "Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): " + f"{content[:200]}...", + ) + except APIConnectionError as e: + return False, f"Errore di connessione API: {e}" + except RateLimitError as e: + return False, f"Errore di Rate Limit API: {e}" + except APIStatusError as e: + return ( + False, + "Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', " + f"autenticazione fallita, quota superata): {e.status_code} - {e.message}", + ) + except Exception as exc: # noqa: BLE001 + return False, ( + f"Errore imprevisto durante il test della connessione: {type(exc).__name__} - {exc}" + ) diff --git a/controllers/question_controller.py b/controllers/question_controller.py new file mode 100644 index 0000000..761dfad --- /dev/null +++ b/controllers/question_controller.py @@ -0,0 +1,169 @@ +"""Controller per la gestione delle domande senza layer di service.""" + +import logging +from typing import IO, Optional, Tuple, List, Dict, Any, Union + +import pandas as pd + +from models.question import Question, question_importer +from utils.cache import ( + get_questions as _get_questions, + refresh_questions as _refresh_questions, +) + +logger = logging.getLogger(__name__) + + +def load_questions() -> pd.DataFrame: + """Restituisce tutte le domande utilizzando la cache.""" + return _get_questions() + + +def refresh_questions() -> pd.DataFrame: + """Svuota e ricarica la cache delle domande.""" + return _refresh_questions() + + +def add_question_if_not_exists( + question_id: str, + domanda: str, + risposta_attesa: str, + categoria: str = "", +) -> bool: + """Aggiunge una domanda solo se non esiste già.""" + + df = load_questions() + if str(question_id) in df["id"].astype(str).values: + return False + + Question.add(domanda, risposta_attesa, categoria, question_id) + refresh_questions() + return True + + +def add_question( + domanda: str, + risposta_attesa: str, + categoria: str = "", + question_id: Optional[str] = None, +) -> str: + """Aggiunge una nuova domanda e aggiorna la cache.""" + qid = Question.add(domanda, risposta_attesa, categoria, question_id) + refresh_questions() + return qid + + +def update_question( + question_id: str, + domanda: Optional[str] = None, + risposta_attesa: Optional[str] = None, + categoria: Optional[str] = None, +) -> bool: + """Aggiorna una domanda esistente e ricarica la cache.""" + updated = Question.update(question_id, domanda, risposta_attesa, categoria) + refresh_questions() + return updated + + +def delete_question(question_id: str) -> None: + """Elimina una domanda e aggiorna la cache.""" + Question.delete(question_id) + refresh_questions() + + +def get_filtered_questions(category: Optional[str] = None) -> Tuple[pd.DataFrame, List[str]]: + """Restituisce il ``DataFrame`` filtrato e l'elenco delle categorie.""" + return Question.filter_by_category(category) + + +def save_question_action( + question_id: str, edited_question: str, edited_answer: str, edited_category: str +) -> dict: + """Aggiorna una domanda e restituisce un dizionario con l'esito. + + Restituisce + ----------- + dict + ``{"success": bool, "questions_df": DataFrame | None}`` + + In caso di successo viene anche ricaricata la lista delle domande. + Eventuali errori sollevati da ``update_question`` vengono propagati. + """ + success = update_question( + question_id, + domanda=edited_question, + risposta_attesa=edited_answer, + categoria=edited_category, + ) + questions = refresh_questions() if success else None + return {"success": success, "questions_df": questions} + + +def delete_question_action(question_id: str) -> pd.DataFrame: + """Elimina una domanda e restituisce il ``DataFrame`` aggiornato.""" + delete_question(question_id) + questions = refresh_questions() + return questions + + +def export_questions_action(destination: Union[str, IO[str]]) -> None: + """Esporta tutte le domande nella destinazione fornita.""" + question_importer.export_to_file(destination) + + +def import_questions_action(uploaded_file: IO[str] | IO[bytes]) -> Dict[str, Any]: + """Importa domande da file e restituisce i risultati dell'operazione. + + Parametri + --------- + uploaded_file: file-like + Il file caricato dall'utente. + + Restituisce + ----------- + dict + ``{"questions_df": DataFrame, "imported_count": int, "warnings": list[str]}`` + """ + + if uploaded_file is None: + raise ValueError("Nessun file caricato.") + + result = question_importer.import_from_file(uploaded_file) + if not result.get("success", True): + message = "; ".join(result.get("warnings", [])) + raise ValueError(message) + + questions = refresh_questions() + return { + "questions_df": questions, + "imported_count": result["imported_count"], + "warnings": result.get("warnings", []), + } + + +def get_question_text(question_id: str, questions_df: Optional[pd.DataFrame] = None) -> str: + """Ritorna il testo della domanda dato il suo ID, ricaricando la cache se necessario.""" + df = questions_df if questions_df is not None else load_questions() + if "domanda" not in df.columns: + df = refresh_questions() + if "domanda" not in df.columns: + return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" + row = df[df["id"] == str(question_id)] + if not row.empty: + return row.iloc[0]["domanda"] + return f"ID Domanda: {question_id} (non trovata)" + + +def get_question_category( + question_id: str, questions_df: Optional[pd.DataFrame] = None +) -> str: + """Ritorna la categoria della domanda dato il suo ID, ricaricando la cache se necessario.""" + df = questions_df if questions_df is not None else load_questions() + if "categoria" not in df.columns: + df = refresh_questions() + if "categoria" not in df.columns: + return f"ID Domanda: {question_id} (colonna 'categoria' mancante)" + row = df[df["id"] == str(question_id)] + if not row.empty: + return row.iloc[0]["categoria"] + return f"ID Domanda: {question_id} (non trovata)" diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py new file mode 100644 index 0000000..a89801c --- /dev/null +++ b/controllers/question_set_controller.py @@ -0,0 +1,112 @@ +import logging +from typing import List, Optional, Any, Dict, IO, Union + +import pandas as pd + +from models.question_set import QuestionSet, question_set_importer # PersistSetsResult not used +from utils.cache import ( + get_questions as _get_questions, + get_question_sets as _get_question_sets, + refresh_question_sets as _refresh_question_sets, +) +from utils.data_format_utils import ( + build_questions_detail, + format_questions_for_view, +) +logger = logging.getLogger(__name__) + + +def load_sets() -> pd.DataFrame: + """Restituisce tutti i set di domande utilizzando la cache.""" + return _get_question_sets() + + +def refresh_question_sets() -> pd.DataFrame: + """Svuota e ricarica la cache dei set di domande.""" + return _refresh_question_sets() + + +def create_set(name: str, question_ids: Optional[List[str]] = None) -> str: + """Crea un nuovo set di domande e aggiorna la cache.""" + set_id = QuestionSet.create(name, question_ids) + refresh_question_sets() + return set_id + + +def update_set( + set_id: str, + name: Optional[str] = None, + question_ids: Optional[List[str]] = None, +) -> pd.DataFrame: + """Aggiorna un set di domande esistente e ricarica la cache. + + Restituisce il DataFrame aggiornato dei set di domande.""" + QuestionSet.update(set_id, name, question_ids) + return refresh_question_sets() + + +def delete_set(set_id: str) -> pd.DataFrame: + """Elimina un set di domande e aggiorna la cache. + + Restituisce il DataFrame aggiornato dei set di domande.""" + QuestionSet.delete(set_id) + return refresh_question_sets() + + +def export_sets_action(destination: Union[str, IO[str]]) -> None: + """Esporta tutti i set di domande nella destinazione indicata.""" + question_set_importer.export_to_file(destination) + + +def prepare_sets_for_view( + selected_categories: Optional[List[str]] = None, +) -> Dict[str, Any]: + """Prepara le informazioni dei set e delle domande per la vista.""" + try: + questions_df_raw = _get_questions() + sets_df = _get_question_sets() + + questions_df, question_map, categories = format_questions_for_view( + questions_df_raw + ) + + if sets_df.empty: + sets_df = pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] + ) + else: + sets_df = sets_df.copy() + sets_df["questions_detail"] = sets_df["questions"].apply( + lambda q_ids: build_questions_detail(question_map, q_ids) + ) + + filtered_sets_df = sets_df + if selected_categories: + def has_categories(details: List[Dict[str, Any]]) -> bool: + categories_in_set = {d.get("categoria") for d in details} + return all(cat in categories_in_set for cat in selected_categories) + + filtered_sets_df = sets_df[ + sets_df["questions_detail"].apply(has_categories) + ] + + return { + "questions_df": questions_df, + "sets_df": filtered_sets_df, + "raw_sets_df": sets_df, + "categories": categories, + } + except Exception as exc: # pragma: no cover - error path + logger.error("Errore nella preparazione dei set: %s", exc) + return { + "questions_df": pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ), + "sets_df": pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] + ), + "raw_sets_df": pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] + ), + "categories": [], + } diff --git a/controllers/result_controller.py b/controllers/result_controller.py new file mode 100644 index 0000000..b650998 --- /dev/null +++ b/controllers/result_controller.py @@ -0,0 +1,100 @@ +import logging +from typing import Dict + +import pandas as pd + +from .test_controller import load_results +from .question_set_controller import load_sets +from .api_preset_controller import load_presets + +logger = logging.getLogger(__name__) + + +def get_results(filter_set: str | None, filter_model: str | None) -> pd.DataFrame: + """Carica i risultati e applica eventuali filtri per set e modello LLM.""" + df = load_results() + + if filter_set: + sets_df = load_sets() + set_ids = ( + sets_df[sets_df["name"] == filter_set]["id"].astype(str).tolist() + if not sets_df.empty + else [] + ) + df = df[df["set_id"].astype(str).isin(set_ids)] + + if filter_model: + presets_df = load_presets() + preset_models: Dict[str, str] = ( + presets_df.set_index("name")["model"].to_dict() + if not presets_df.empty + else {} + ) + + def matches_model(res: dict) -> bool: + model = res.get("generation_llm") + if not model: + preset_name = res.get("generation_preset") + model = preset_models.get(preset_name) if preset_name else None + return model == filter_model + + df = df[df["results"].apply(matches_model)] + + return df + + +def list_set_names(results_df: pd.DataFrame, question_sets_df: pd.DataFrame) -> list[str]: + """Elenca i nomi dei set disponibili nei risultati.""" + if results_df.empty or question_sets_df.empty: + return [] + set_name_map = { + str(row["id"]): row["name"] + for row in question_sets_df.to_dict("records") + } + names = {set_name_map.get(str(sid), "Set Sconosciuto") for sid in results_df["set_id"]} + return sorted(names) + + +def list_model_names(results_df: pd.DataFrame) -> list[str]: + """Elenca i nomi dei modelli LLM presenti nei risultati.""" + if results_df.empty: + return [] + presets_df = load_presets() + preset_models: Dict[str, str] = ( + presets_df.set_index("name")["model"].to_dict() if not presets_df.empty else {} + ) + models = set() + for res in results_df["results"]: + model = res.get("generation_llm") + if not model and res.get("generation_preset"): + model = preset_models.get(res.get("generation_preset")) + if model: + models.add(model) + return sorted(models) + + +def prepare_select_options( + results_df: pd.DataFrame, question_sets_df: pd.DataFrame +) -> Dict[str, str]: + """Prepara le opzioni del selectbox dei risultati.""" + if results_df.empty: + return {} + set_name_map = { + str(row["id"]): row["name"] + for row in question_sets_df.to_dict("records") + } + processed = [] + for _, row in results_df.iterrows(): + result_data = row["results"] + set_name = set_name_map.get(str(row["set_id"]), "Set Sconosciuto") + avg_score = result_data.get("avg_score", 0) + method = result_data.get("method", "N/A") + method_icon = "🤖" if method == "LLM" else "📊" + processed.append( + { + "id": row["id"], + "display_name": f"{row['timestamp']} - {method_icon} {set_name} (Avg: {avg_score:.2f}%) - {method}", + } + ) + processed.sort(key=lambda x: x["display_name"].split(" - ")[0], reverse=True) + return {p["id"]: p["display_name"] for p in processed} diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py new file mode 100644 index 0000000..99e0389 --- /dev/null +++ b/controllers/startup_controller.py @@ -0,0 +1,23 @@ +import logging + +from utils.cache import get_questions, get_question_sets, get_results +from utils.startup_utils import ( + DefaultConfig, + initialize_database, + load_default_config, + # setup_logging, +) + +logger = logging.getLogger(__name__) + + +def get_initial_state() -> dict[str, object]: + """Restituisce lo stato predefinito dell'applicazione.""" + initialize_database() + defaults: DefaultConfig = load_default_config() + cached_data: dict[str, object] = { + "questions": get_questions(), + "question_sets": get_question_sets(), + "results": get_results(), + } + return {**cached_data, **defaults} diff --git a/controllers/test_controller.py b/controllers/test_controller.py new file mode 100644 index 0000000..1dfd3db --- /dev/null +++ b/controllers/test_controller.py @@ -0,0 +1,299 @@ +"""Funzioni per la gestione dei test e della valutazione tramite LLM.""" + +from __future__ import annotations + +import json +import logging +from datetime import datetime +from typing import Any, Dict, IO, List, Tuple, Union + +import pandas as pd +from openai import APIConnectionError, APIStatusError, RateLimitError + +from models.test_result import TestResult, test_result_importer +from models.question import Question +from utils import openai_client + +DEFAULT_MODEL = openai_client.DEFAULT_MODEL + +logger = logging.getLogger(__name__) + + +def load_results() -> pd.DataFrame: + """Restituisce i risultati dei test utilizzando la cache.""" + return TestResult.load_all_df() + + +def refresh_results() -> pd.DataFrame: + """Svuota e ricarica la cache dei risultati dei test.""" + return TestResult.refresh_cache() + + +def import_results_action( + uploaded_file: IO[str] | IO[bytes], +) -> Tuple[pd.DataFrame, str]: + """Importa risultati da ``uploaded_file`` e restituisce il DataFrame aggiornato. + + Parametri + --------- + uploaded_file: Oggetto tipo file caricato contenente i risultati. + + Restituisce + ----------- + Tuple[pd.DataFrame, str] + Il DataFrame aggiornato dei risultati e un messaggio descrittivo. + + Eccezioni + --------- + ValueError + Se il file non è presente o contiene dati non validi. + """ + + if uploaded_file is None: + raise ValueError("Nessun file caricato.") + + result = test_result_importer.import_from_file(uploaded_file) + results = load_results() + return results, result["message"] + + +def export_results_action(destination: Union[str, IO[str]]) -> None: + """Esporta i risultati dei test nella destinazione fornita.""" + test_result_importer.export_to_file(destination) + + +def generate_answer(question: str, client_config: Dict[str, Any]) -> str: + """Genera una risposta per ``question`` utilizzando la configurazione LLM fornita. + + Restituisce solo la risposta generata. In caso di errore viene sollevata + un'eccezione. + """ + + api_key = str(client_config.get("api_key", "")) + try: + client = openai_client.get_openai_client( + api_key=api_key, + base_url=client_config.get("endpoint"), + ) + except openai_client.ClientCreationError as exc: + logger.error( + "Client API per la generazione risposte non configurato: %s", exc + ) + raise ValueError("Client API non configurato") from exc + + if question is None or not isinstance(question, str) or question.strip() == "": + logger.error("La domanda fornita è vuota o non valida.") + raise ValueError("Domanda vuota o non valida") + + prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" + api_request_details = { + "model": client_config.get("model", DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.7), + "max_tokens": client_config.get("max_tokens", 500), + } + + try: + response = client.chat.completions.create(**api_request_details) + choices = getattr(response, "choices", None) + if not choices or not choices[0].message.content: + raise RuntimeError("Risposta API non valida") + return choices[0].message.content.strip() + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logger.error( + f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + ) + raise RuntimeError(str(e)) from e + except Exception as exc: # noqa: BLE001 + logger.error( + f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" + ) + raise RuntimeError(str(exc)) from exc + + +def evaluate_answer( + question: str, + expected_answer: str, + actual_answer: str, + client_config: Dict[str, Any], +) -> Dict[str, Any]: + """Valuta ``actual_answer`` rispetto a ``expected_answer`` utilizzando un LLM. + + Restituisce i dati di valutazione come dizionario oppure solleva + un'eccezione in caso di errore. + """ + + api_key = str(client_config.get("api_key", "")) + try: + client = openai_client.get_openai_client( + api_key=api_key, + base_url=client_config.get("endpoint"), + ) + except openai_client.ClientCreationError as exc: + raise ValueError( + "Errore: Client API per la valutazione non configurato." + ) from exc + + prompt = f""" + Sei un valutatore esperto che valuta la qualità delle risposte alle domande. + Domanda: {question} + Risposta Attesa: {expected_answer} + Risposta Effettiva: {actual_answer} + + Valuta la risposta effettiva rispetto alla risposta attesa in base a: + 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? + 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? + 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? + Calcola un punteggio complessivo (0-100) basato su queste metriche. + Fornisci una breve spiegazione della tua valutazione (max 100 parole). + Formatta la tua risposta come un oggetto JSON con questi campi: + - score: il punteggio complessivo (numero) + - explanation: la tua spiegazione (stringa) + - similarity: punteggio di somiglianza (numero) + - correctness: punteggio di correttezza (numero) + - completeness: punteggio di completezza (numero) + Esempio di risposta JSON: + {{ + "score": 95, + "explanation": "La risposta è corretta e completa", + "similarity": 90, + "correctness": 100, + "completeness": 95 + }} + """ + + api_request_details = { + "model": client_config.get("model", DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.0), + "max_tokens": client_config.get("max_tokens", 250), + "response_format": {"type": "json_object"}, + } + + try: + response = client.chat.completions.create(**api_request_details) + choices = getattr(response, "choices", None) + if not choices or not choices[0].message.content: + logger.error("Risposta API priva di 'choices' validi") + raise RuntimeError("Risposta API non valida.") + content = choices[0].message.content + evaluation = json.loads(content) + required_keys = [ + "score", + "explanation", + "similarity", + "correctness", + "completeness", + ] + if not all(key in evaluation for key in required_keys): + raise ValueError(f"Risposta JSON incompleta: {content}") + return evaluation + except json.JSONDecodeError as e: + logger.error( + f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" + ) + raise ValueError(f"Errore di decodifica JSON: {content[:100]}...") from e + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logger.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") + raise RuntimeError(str(e)) from e + except Exception as exc: # noqa: BLE001 + logger.error( + f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" + ) + raise RuntimeError(str(exc)) from exc + + +def run_test( + set_id: str, + set_name: str, + question_ids: List[str], + gen_preset_config: dict[str, Any], + eval_preset_config: dict[str, Any], +) -> dict[str, Any]: + """Esegue un test generando e valutando risposte con LLM.""" + + try: + questions_map = {str(q.id): q for q in Question.load_all()} + results: Dict[str, Dict[str, Any]] = {} + + for q_id in question_ids: + q_obj = questions_map.get(str(q_id)) + if not q_obj: + continue + question = q_obj.domanda or "" + if not question.strip(): + continue + expected = q_obj.risposta_attesa or "Risposta non disponibile" + + try: + actual_answer = generate_answer(question, gen_preset_config) + except Exception as e: # noqa: BLE001 + error_msg = str(e) + evaluation = { + "score": 0, + "explanation": error_msg, + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + actual_answer = error_msg + else: + try: + evaluation = evaluate_answer( + question, expected, actual_answer, eval_preset_config + ) + except Exception as e: # noqa: BLE001 + error_msg = str(e) + evaluation = { + "score": 0, + "explanation": error_msg, + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + + results[str(q_id)] = { + "question": question, + "expected_answer": expected, + "actual_answer": actual_answer, + "evaluation": evaluation, + } + + stats = TestResult.calculate_statistics(results) + result_data = { + "set_name": set_name, + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "avg_score": stats["avg_score"], + "sample_type": "Generata da LLM", + "method": "LLM", + "generation_llm": gen_preset_config.get("model"), + "evaluation_llm": eval_preset_config.get("model"), + "questions": results, + "per_question_scores": stats["per_question_scores"], + "radar_metrics": stats["radar_metrics"], + } + + result_id = TestResult.add_and_refresh(set_id, result_data) + return { + "result_id": result_id, + "avg_score": stats["avg_score"], + "results": results, + "per_question_scores": stats["per_question_scores"], + "radar_metrics": stats["radar_metrics"], + "results_df": TestResult.load_all_df(), + } + except Exception as exc: # noqa: BLE001 + logger.error( + f"Errore durante l'esecuzione del test LLM: {type(exc).__name__} - {exc}" + ) + return {} + + +__all__ = [ + "load_results", + "refresh_results", + "import_results_action", + "generate_answer", + "evaluate_answer", + "run_test", +] diff --git a/db.config b/db.config new file mode 100644 index 0000000..f69b3f1 --- /dev/null +++ b/db.config @@ -0,0 +1,14 @@ +[mysql] +host=db +user=root +password= +database=llm_platform +port=3306 +ssl_ca= + +# Per l'ambiente Docker, utilizzare: +# host=db +# user=root +# password= +# database=llm_platform +# port=3306 diff --git a/db.config.example b/db.config.example new file mode 100644 index 0000000..a35d8de --- /dev/null +++ b/db.config.example @@ -0,0 +1,14 @@ +[mysql] +host=localhost +user=root +password=your_password_here +database=llm_platform +port=3306 +ssl_ca= + +# Per l'ambiente Docker, utilizzare: +# host=db +# user=root +# password= +# database=llm_platform +# port=3306 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..64a3b52 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,30 @@ +services: + db: + image: mysql:8.0 + container_name: db + volumes: + - db_data:/var/lib/mysql + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_ROOT_HOST: '%' + networks: + - llm-network + + app: + build: . + container_name: llm-app + ports: + - '8501:8501' + volumes: + - .:/app + depends_on: + - db + command: streamlit run app.py --server.port 8501 --server.address 0.0.0.0 + networks: + - llm-network + +volumes: + db_data: + +networks: + llm-network: diff --git a/initialize_db.py b/initialize_db.py new file mode 100644 index 0000000..6da421f --- /dev/null +++ b/initialize_db.py @@ -0,0 +1,23 @@ +import logging +from utils.startup_utils import setup_logging + +logger = logging.getLogger(__name__) + +try: + from models.database import DatabaseEngine +except ModuleNotFoundError as exc: + logger.error( + "Modulo mancante. Installa le dipendenze con 'pip install -r requirements.txt'" + ) + logger.error(f"Errore specifico: {exc}") + raise exc + +if __name__ == '__main__': + setup_logging() + logger.info("Inizializzazione del database...") + try: + DatabaseEngine.instance().init_db() + logger.info("Database inizializzato con successo!") + except Exception as e: + logger.error(f"Errore durante l'inizializzazione del database: {e}") + logger.exception("Traccia dettagliata:") diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..ab60af6 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,2 @@ +import logging +logger = logging.getLogger(__name__) diff --git a/models/__pycache__/__init__.cpython-311.pyc b/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..96cf87a Binary files /dev/null and b/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/models/__pycache__/api_preset.cpython-311.pyc b/models/__pycache__/api_preset.cpython-311.pyc new file mode 100644 index 0000000..d0e4f39 Binary files /dev/null and b/models/__pycache__/api_preset.cpython-311.pyc differ diff --git a/models/__pycache__/database.cpython-311.pyc b/models/__pycache__/database.cpython-311.pyc new file mode 100644 index 0000000..eb5c296 Binary files /dev/null and b/models/__pycache__/database.cpython-311.pyc differ diff --git a/models/__pycache__/db_utils.cpython-311.pyc b/models/__pycache__/db_utils.cpython-311.pyc new file mode 100644 index 0000000..a11f286 Binary files /dev/null and b/models/__pycache__/db_utils.cpython-311.pyc differ diff --git a/models/__pycache__/orm_models.cpython-311.pyc b/models/__pycache__/orm_models.cpython-311.pyc new file mode 100644 index 0000000..a76012f Binary files /dev/null and b/models/__pycache__/orm_models.cpython-311.pyc differ diff --git a/models/__pycache__/question.cpython-311.pyc b/models/__pycache__/question.cpython-311.pyc new file mode 100644 index 0000000..c4fd0c8 Binary files /dev/null and b/models/__pycache__/question.cpython-311.pyc differ diff --git a/models/__pycache__/question_set.cpython-311.pyc b/models/__pycache__/question_set.cpython-311.pyc new file mode 100644 index 0000000..401aed2 Binary files /dev/null and b/models/__pycache__/question_set.cpython-311.pyc differ diff --git a/models/__pycache__/test_result.cpython-311.pyc b/models/__pycache__/test_result.cpython-311.pyc new file mode 100644 index 0000000..7e3ff1a Binary files /dev/null and b/models/__pycache__/test_result.cpython-311.pyc differ diff --git a/models/api_preset.py b/models/api_preset.py new file mode 100644 index 0000000..8529f74 --- /dev/null +++ b/models/api_preset.py @@ -0,0 +1,73 @@ +import logging + +from dataclasses import dataclass, asdict +from typing import List +from sqlalchemy import select + +from models.database import DatabaseEngine +from models.orm_models import APIPresetORM +logger = logging.getLogger(__name__) + + +@dataclass +class APIPreset: + id: str + name: str + provider_name: str + endpoint: str + api_key: str + model: str + temperature: float + max_tokens: int + + @staticmethod + def load_all() -> List["APIPreset"]: + with DatabaseEngine.instance().get_session() as session: + presets = session.execute(select(APIPresetORM)).scalars().all() + return [ + APIPreset( + id=p.id, + name=p.name, + provider_name=p.provider_name, + endpoint=p.endpoint, + api_key=p.api_key, + model=p.model, + temperature=p.temperature, + max_tokens=p.max_tokens, + ) + for p in presets + ] + + @staticmethod + def save(presets: List["APIPreset"]) -> None: + """Salva un elenco di preset API.""" + with DatabaseEngine.instance().get_session() as session: + existing_ids = session.execute(select(APIPresetORM.id)).scalars().all() + incoming_ids = [p.id for p in presets] + + for del_id in set(existing_ids) - set(incoming_ids): + obj = session.get(APIPresetORM, del_id) + if obj: + session.delete(obj) + + for preset in presets: + obj = session.get(APIPresetORM, preset.id) + if obj: + obj.name = preset.name + obj.provider_name = preset.provider_name + obj.endpoint = preset.endpoint + obj.api_key = preset.api_key + obj.model = preset.model + obj.temperature = preset.temperature + obj.max_tokens = preset.max_tokens + else: + session.add(APIPresetORM(**asdict(preset))) + session.commit() + + @staticmethod + def delete(preset_id: str) -> None: + with DatabaseEngine.instance().get_session() as session: + obj = session.get(APIPresetORM, preset_id) + if obj: + session.delete(obj) + session.commit() diff --git a/models/cached_data.py b/models/cached_data.py new file mode 100644 index 0000000..fd6235e --- /dev/null +++ b/models/cached_data.py @@ -0,0 +1,24 @@ +import logging +from typing import List + +from models.api_preset import APIPreset +from models.question import Question +from models.question_set import QuestionSet +from models.test_result import TestResult +logger = logging.getLogger(__name__) + + +def get_questions() -> List[Question]: + return Question.load_all() + + +def get_question_sets() -> List[QuestionSet]: + return QuestionSet.load_all() + + +def get_api_presets() -> List[APIPreset]: + return APIPreset.load_all() + + +def get_results() -> List[TestResult]: + return TestResult.load_all() diff --git a/models/data/api_presets.csv b/models/data/api_presets.csv new file mode 100644 index 0000000..355c736 --- /dev/null +++ b/models/data/api_presets.csv @@ -0,0 +1,4 @@ +id,name,provider_name,endpoint,api_key,model,temperature,max_tokens +6ba759ec-d6ec-4942-a764-0fbb2180771d,test,nan,https://api.openai.com/v1,test,gpt-4o,0.0,1000 +fe0c7c11-a959-4627-8701-8bf33efb7501,12e12e,nan,https://api.openai.com/v1,qweqweqwe,gpt-4o,0.0,1000 +9813c48d-b32c-4f29-b63e-0bc9ed4f693e,1231,nan,https://api.openai.com/v1,nan,gpt-4o,0.0,1000 diff --git a/models/data/basic_math.json b/models/data/basic_math.json new file mode 100644 index 0000000..50b4991 --- /dev/null +++ b/models/data/basic_math.json @@ -0,0 +1,102 @@ +[ + { + "domanda": "Quanto fa 7 + 5?", + "risposta_attesa": "12", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 9 x 3?", + "risposta_attesa": "27", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il quadrato di 6?", + "risposta_attesa": "36", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 100 ÷ 4?", + "risposta_attesa": "25", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è la radice quadrata di 49?", + "risposta_attesa": "7", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 15 - 8?", + "risposta_attesa": "7", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il risultato di 5²?", + "risposta_attesa": "25", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 3 x (2 + 4)?", + "risposta_attesa": "18", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 10 % di 200?", + "risposta_attesa": "20", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il minimo comune multiplo di 4 e 6?", + "risposta_attesa": "12", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa (8 + 2) x 5?", + "risposta_attesa": "50", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 2³?", + "risposta_attesa": "8", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il massimo comune divisore di 18 e 24?", + "risposta_attesa": "6", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 0 x 154?", + "risposta_attesa": "0", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il reciproco di 2?", + "risposta_attesa": "1/2", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 1/4 + 1/2?", + "risposta_attesa": "3/4", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 0.1 + 0.2?", + "risposta_attesa": "0.3", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il risultato di -3 + 7?", + "risposta_attesa": "4", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 20 % di 80?", + "risposta_attesa": "16", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è la metà di 1.5?", + "risposta_attesa": "0.75", + "categoria": "Matematica di Base" + } +] diff --git a/models/data/capital_cities.json b/models/data/capital_cities.json new file mode 100644 index 0000000..275f0e0 --- /dev/null +++ b/models/data/capital_cities.json @@ -0,0 +1,102 @@ +[ + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Italia?", + "risposta_attesa": "Roma", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Spagna?", + "risposta_attesa": "Madrid", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale del Regno Unito?", + "risposta_attesa": "Londra", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Grecia?", + "risposta_attesa": "Atene", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Svezia?", + "risposta_attesa": "Stoccolma", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Norvegia?", + "risposta_attesa": "Oslo", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dei Paesi Bassi?", + "risposta_attesa": "Amsterdam", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Danimarca?", + "risposta_attesa": "Copenaghen", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Polonia?", + "risposta_attesa": "Varsavia", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Belgio?", + "risposta_attesa": "Bruxelles", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Svizzera?", + "risposta_attesa": "Berna", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Austria?", + "risposta_attesa": "Vienna", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Finlandia?", + "risposta_attesa": "Helsinki", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Repubblica Ceca?", + "risposta_attesa": "Praga", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale del Portogallo?", + "risposta_attesa": "Lisboa", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Romania?", + "risposta_attesa": "Bucarest", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Ungheria?", + "risposta_attesa": "Budapest", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Bulgaria?", + "risposta_attesa": "Sofia", + "categoria": "Capitali" + } +] diff --git a/models/data/question_sets.csv b/models/data/question_sets.csv new file mode 100644 index 0000000..e9418eb --- /dev/null +++ b/models/data/question_sets.csv @@ -0,0 +1,3 @@ +id,name,questions +75de3c47-f0b9-4958-a3ff-eb5754ffa1c6,Matematica di Base,"[""c64efc53-5a3e-46ea-8293-7ade24b886bb"", ""deafc6ae-decd-4fd0-8d6d-438b90d36e58"", ""372bf717-e147-401e-a477-3705388f73bd"", ""2e43b39f-4e38-4c8e-bf47-16d1797c53cc"", ""5045a59c-6e92-4069-9ae4-14b63d745d0d"", ""4ee6f745-e500-4cfc-affd-103edc8a8c87"", ""1e8974db-17a1-42fd-befa-382b65d0e742"", ""15d2eb56-6bb6-4975-9a91-cb657c4471eb"", ""c7186103-8dfd-4d25-8873-405b85078a1b"", ""f86e3a38-0a40-4116-ba15-d1708d84e1dd"", ""4fe182fb-5894-46e8-b0a5-f7197af72ee2"", ""05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6"", ""b69ebaa6-1413-46bb-91cd-5585da5a56b4"", ""28ff22b3-2565-4700-b0e9-027dbb4aa862"", ""97a38e3b-b87e-48f9-9360-cfbea3bd822f"", ""40563245-06cc-4956-bddd-248f6418c00d"", ""c136bf44-742d-4493-b26c-63b72ab2279d"", ""42c1bc09-0949-4cdc-b8c2-408f8109d544"", ""2b5d1726-d399-4100-b6a6-abeb46f598ae"", ""481e3cbb-0b18-4139-a2e3-7a723b014414""]" +2bf4c0e3-d6e5-47b3-abb4-f461a07828e3,Capitali,"[""d1570b42-b01c-48c6-9c19-4ed6cf417c90"", ""2554f189-4c9f-4ebe-b475-6be2f4cf6f56"", ""a2608578-e0be-42bf-8060-bec2ed7fa8ec"", ""fd292041-d79d-400b-b333-96c579b80ac5"", ""923fe770-6b7f-45fb-8adb-a891ee87bd70"", ""f758b801-aa8a-4778-ae91-676e67d05148"", ""1f9ab468-f92b-4107-b37d-29c2efd1331b"", ""12714d1c-97a1-41fc-9782-db56b8b57c8e"", ""ff047143-7bc0-4444-bb3e-31c6f5000a8c"", ""4f98a718-03b3-41e6-81f6-90296532138e"", ""4024de79-d256-4b26-be15-8afd769254aa"", ""52b8f680-bb7e-4fc1-8151-191552850771"", ""40894524-2160-4aae-b169-a6966598580b"", ""07cafa28-0259-47d3-9285-9abb455f821e"", ""8a96aa6d-e976-41ac-9b09-2461e9af8af7"", ""371af137-9edb-49ce-977c-06b03dcae599"", ""52ce73ef-67b8-4bcf-8ae6-458a96657bd4"", ""1019288e-6c37-42b8-8a26-4167a1836aa9"", ""742219a0-76cf-4c70-a14f-fa11e0dd6c40"", ""62f15176-ca1d-4cae-9c12-13e943758621""]" diff --git a/models/data/questions.csv b/models/data/questions.csv new file mode 100644 index 0000000..0e086ae --- /dev/null +++ b/models/data/questions.csv @@ -0,0 +1,60 @@ +id,domanda,risposta_attesa,categoria +f051d43f-a099-4dc2-b12c-d400e7903da7,Qual è il quadrato di 6?,36,Matematica di Base +520b3a45-977c-436d-a393-1f7eda68111f,Quanto fa 100 ÷ 4?,25,Matematica di Base +eb44677c-6c3a-4bcc-a43d-16edae7ff1bf,Qual è la radice quadrata di 49?,7,Matematica di Base +a724c198-931c-4abf-814c-100356191f93,Quanto fa 15 - 8?,7,Matematica di Base +e253550c-6e04-423d-b276-1381d79c5af8,Qual è il risultato di 5²?,25,Matematica di Base +0fffd5d0-4d48-4b51-be4e-d9e8e8ad2ca5,Quanto fa 3 x (2 + 4)?,18,Matematica di Base +e16786cb-2261-493a-8712-b33ac4b5c142,Quanto fa 10 % di 200?,20,Matematica di Base +514ce7bf-823d-4baf-b9c8-4a00600ea908,Qual è il minimo comune multiplo di 4 e 6?,12,Matematica di Base +865097f0-a89b-4f9c-bdee-c0429ec27b2c,Quanto fa (8 + 2) x 5?,50,Matematica di Base +9f90d803-38f6-4f15-9c51-2d1848602e08,Quanto fa 2³?,8,Matematica di Base +cf805fa2-8fe2-4b58-b213-399136c3c030,Qual è il massimo comune divisore di 18 e 24?,6,Matematica di Base +11a3d746-e565-4458-a4cf-eb2f31f00d09,Quanto fa 0 x 154?,0,Matematica di Base +2e903e0b-bc0f-4a09-805b-1842f1f93cf7,Qual è il reciproco di 2?,1/2,Matematica di Base +23f3e6da-71bd-4ae3-8084-3d3a2d3875e0,Quanto fa 1/4 + 1/2?,3/4,Matematica di Base +de9450e6-6876-4dfa-9fbd-1bbc5a7b26aa,Quanto fa 0.1 + 0.2?,0.3,Matematica di Base +40064003-197c-4163-9a16-86cddbea341c,Qual è il risultato di -3 + 7?,4,Matematica di Base +320626f1-054c-4da8-a4a3-0a8a7662aa3f,Quanto fa 20 % di 80?,16,Matematica di Base +87d176d4-5190-4126-9aaf-9f291cb69007,Qual è la metà di 1.5?,0.75,Matematica di Base +c64efc53-5a3e-46ea-8293-7ade24b886bb,Quanto fa 7 + 5?,12,Matematica di Base +deafc6ae-decd-4fd0-8d6d-438b90d36e58,Quanto fa 9 × 3?,27,Matematica di Base +372bf717-e147-401e-a477-3705388f73bd,Qual è il quadrato di 6?,36,Matematica di Base +2e43b39f-4e38-4c8e-bf47-16d1797c53cc,Quanto fa 100 ÷ 4?,25,Matematica di Base +5045a59c-6e92-4069-9ae4-14b63d745d0d,Qual è la radice quadrata di 49?,7,Matematica di Base +4ee6f745-e500-4cfc-affd-103edc8a8c87,Quanto fa 15 - 8?,7,Matematica di Base +1e8974db-17a1-42fd-befa-382b65d0e742,Qual è il risultato di 5²?,25,Matematica di Base +15d2eb56-6bb6-4975-9a91-cb657c4471eb,Quanto fa 3 × (2 + 4)?,18,Matematica di Base +c7186103-8dfd-4d25-8873-405b85078a1b,Quanto fa 10 % di 200?,20,Matematica di Base +f86e3a38-0a40-4116-ba15-d1708d84e1dd,Qual è il minimo comune multiplo di 4 e 6?,12,Matematica di Base +4fe182fb-5894-46e8-b0a5-f7197af72ee2,Quanto fa (8 + 2) × 5?,50,Matematica di Base +05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6,Quanto fa 2³?,8,Matematica di Base +b69ebaa6-1413-46bb-91cd-5585da5a56b4,Qual è il massimo comune divisore di 18 e 24?,6,Matematica di Base +28ff22b3-2565-4700-b0e9-027dbb4aa862,Quanto fa 0 × 154?,0,Matematica di Base +97a38e3b-b87e-48f9-9360-cfbea3bd822f,Qual è il reciproco di 2?,1/2,Matematica di Base +40563245-06cc-4956-bddd-248f6418c00d,Quanto fa 1/4 + 1/2?,3/4,Matematica di Base +c136bf44-742d-4493-b26c-63b72ab2279d,Quanto fa 0.1 + 0.2?,0.3,Matematica di Base +42c1bc09-0949-4cdc-b8c2-408f8109d544,Qual è il risultato di -3 + 7?,4,Matematica di Base +2b5d1726-d399-4100-b6a6-abeb46f598ae,Quanto fa 20 % di 80?,16,Matematica di Base +481e3cbb-0b18-4139-a2e3-7a723b014414,Qual è la metà di 1.5?,0.75,Matematica di Base +d1570b42-b01c-48c6-9c19-4ed6cf417c90,Qual è la capitale della Francia?,Parigi,Capitali +2554f189-4c9f-4ebe-b475-6be2f4cf6f56,Qual è la capitale della Germania?,Berlino,Capitali +a2608578-e0be-42bf-8060-bec2ed7fa8ec,Qual è la capitale dell'Italia?,Roma,Capitali +fd292041-d79d-400b-b333-96c579b80ac5,Qual è la capitale della Spagna?,Madrid,Capitali +923fe770-6b7f-45fb-8adb-a891ee87bd70,Qual è la capitale del Regno Unito?,Londra,Capitali +f758b801-aa8a-4778-ae91-676e67d05148,Qual è la capitale della Grecia?,Atene,Capitali +1f9ab468-f92b-4107-b37d-29c2efd1331b,Qual è la capitale della Svezia?,Stoccolma,Capitali +12714d1c-97a1-41fc-9782-db56b8b57c8e,Qual è la capitale della Norvegia?,Oslo,Capitali +ff047143-7bc0-4444-bb3e-31c6f5000a8c,Qual è la capitale dei Paesi Bassi?,Amsterdam,Capitali +4f98a718-03b3-41e6-81f6-90296532138e,Qual è la capitale della Danimarca?,Copenaghen,Capitali +4024de79-d256-4b26-be15-8afd769254aa,Qual è la capitale della Polonia?,Varsavia,Capitali +52b8f680-bb7e-4fc1-8151-191552850771,Qual è la capitale della Belgio?,Bruxelles,Capitali +40894524-2160-4aae-b169-a6966598580b,Qual è la capitale della Svizzera?,Berna,Capitali +07cafa28-0259-47d3-9285-9abb455f821e,Qual è la capitale dell'Austria?,Vienna,Capitali +8a96aa6d-e976-41ac-9b09-2461e9af8af7,Qual è la capitale della Finlandia?,Helsinki,Capitali +371af137-9edb-49ce-977c-06b03dcae599,Qual è la capitale della Repubblica Ceca?,Praga,Capitali +52ce73ef-67b8-4bcf-8ae6-458a96657bd4,Qual è la capitale del Portogallo?,Lisboa,Capitali +1019288e-6c37-42b8-8a26-4167a1836aa9,Qual è la capitale della Romania?,Bucarest,Capitali +742219a0-76cf-4c70-a14f-fa11e0dd6c40,Qual è la capitale dell'Ungheria?,Budapest,Capitali +62f15176-ca1d-4cae-9c12-13e943758621,Qual è la capitale della Bulgaria?,Sofia,Capitali +d6a8a902-920e-41bc-9c2a-ecf7d6980f6a,Di che colore è il cavallo bianco di Napoleone?,Bianco, diff --git a/models/data/test_results.csv b/models/data/test_results.csv new file mode 100644 index 0000000..f9de9a3 --- /dev/null +++ b/models/data/test_results.csv @@ -0,0 +1,2 @@ +id,set_id,timestamp,results +b0ce010f-6f1a-405f-98cc-b46f075c7a6a,75de3c47-f0b9-4958-a3ff-eb5754ffa1c6,2025-06-24 13:22:16,"{""set_name"": ""Matematica di Base"", ""timestamp"": ""2025-06-24 13:22:16"", ""avg_score"": 91.25, ""sample_type"": ""Generata da LLM"", ""method"": ""LLM"", ""generation_preset"": ""t1"", ""evaluation_preset"": ""t1"", ""questions"": {""c64efc53-5a3e-46ea-8293-7ade24b886bb"": {""question"": ""Quanto fa 7 + 5?"", ""expected_answer"": ""12"", ""actual_answer"": ""7 + 5 fa 12."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, corretta e completa. Contiene la stessa informazione chiave (il risultato della somma) e utilizza una struttura simile, anche se leggermente pi\u00f9 elaborata."", ""similarity"": 90, ""correctness"": 100, ""completeness"": 95, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 7 + 5?\n Risposta Attesa: 12\n Risposta Effettiva: 7 + 5 fa 12.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, corretta e completa. Contiene la stessa informazione chiave (il risultato della somma) e utilizza una struttura simile, anche se leggermente pi\u00f9 elaborata.\"",\n \""similarity\"": 90,\n \""correctness\"": 100,\n \""completeness\"": 95\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 7 + 5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7 + 5 fa 12.""}}, ""deafc6ae-decd-4fd0-8d6d-438b90d36e58"": {""question"": ""Quanto fa 9 \u00d7 3?"", ""expected_answer"": ""27"", ""actual_answer"": ""27"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 9 \u00d7 3?\n Risposta Attesa: 27\n Risposta Effettiva: 27\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 9 \u00d7 3?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""27""}}, ""372bf717-e147-401e-a477-3705388f73bd"": {""question"": ""Qual \u00e8 il quadrato di 6?"", ""expected_answer"": ""36"", ""actual_answer"": ""Il quadrato di 6 \u00e8 36."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 semanticamente simile, corretta e completa rispetto alla risposta attesa. Contiene tutte le informazioni chiave e la correttezza \u00e8 garantita."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il quadrato di 6?\n Risposta Attesa: 36\n Risposta Effettiva: Il quadrato di 6 \u00e8 36.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 semanticamente simile, corretta e completa rispetto alla risposta attesa. Contiene tutte le informazioni chiave e la correttezza \u00e8 garantita.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il quadrato di 6?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il quadrato di 6 \u00e8 36.""}}, ""2e43b39f-4e38-4c8e-bf47-16d1797c53cc"": {""question"": ""Quanto fa 100 \u00f7 4?"", ""expected_answer"": ""25"", ""actual_answer"": ""100 \u00f7 4 fa **25**."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 perfettamente simile, corretta e completa rispetto alla risposta attesa. Il risultato calcolato \u00e8 25, che corrisponde esattamente all'attesa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 100 \u00f7 4?\n Risposta Attesa: 25\n Risposta Effettiva: 100 \u00f7 4 fa **25**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 perfettamente simile, corretta e completa rispetto alla risposta attesa. Il risultato calcolato \u00e8 25, che corrisponde esattamente all'attesa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 100 \u00f7 4?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""100 \u00f7 4 fa **25**.""}}, ""5045a59c-6e92-4069-9ae4-14b63d745d0d"": {""question"": ""Qual \u00e8 la radice quadrata di 49?"", ""expected_answer"": ""7"", ""actual_answer"": ""7"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, pertanto il punteggio complessivo \u00e8 al massimo."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 la radice quadrata di 49?\n Risposta Attesa: 7\n Risposta Effettiva: 7\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, pertanto il punteggio complessivo \u00e8 al massimo.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 la radice quadrata di 49?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7""}}, ""4ee6f745-e500-4cfc-affd-103edc8a8c87"": {""question"": ""Quanto fa 15 - 8?"", ""expected_answer"": ""7"", ""actual_answer"": ""7"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 15 - 8?\n Risposta Attesa: 7\n Risposta Effettiva: 7\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 15 - 8?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7""}}, ""1e8974db-17a1-42fd-befa-382b65d0e742"": {""question"": ""Qual \u00e8 il risultato di 5\u00b2?"", ""expected_answer"": ""25"", ""actual_answer"": ""25"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il risultato di 5\u00b2?\n Risposta Attesa: 25\n Risposta Effettiva: 25\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il risultato di 5\u00b2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""25""}}, ""15d2eb56-6bb6-4975-9a91-cb657c4471eb"": {""question"": ""Quanto fa 3 \u00d7 (2 + 4)?"", ""expected_answer"": ""18"", ""actual_answer"": ""12"", ""evaluation"": {""score"": 50, ""explanation"": ""La risposta effettiva \u00e8 semanticamente simile ma errata rispetto alla risposta attesa. Non \u00e8 completa perch\u00e9 manca la correttezza."", ""similarity"": 50, ""correctness"": 0, ""completeness"": 50, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 3 \u00d7 (2 + 4)?\n Risposta Attesa: 18\n Risposta Effettiva: 12\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 50,\n \""explanation\"": \""La risposta effettiva \u00e8 semanticamente simile ma errata rispetto alla risposta attesa. Non \u00e8 completa perch\u00e9 manca la correttezza.\"",\n \""similarity\"": 50,\n \""correctness\"": 0,\n \""completeness\"": 50\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 3 \u00d7 (2 + 4)?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""12""}}, ""c7186103-8dfd-4d25-8873-405b85078a1b"": {""question"": ""Quanto fa 10 % di 200?"", ""expected_answer"": ""20"", ""actual_answer"": ""10 %."", ""evaluation"": {""score"": 50, ""explanation"": ""La risposta effettiva \u00e8 simile alla risposta attesa nel contesto (stima percentuale), ma \u00e8 mancante del risultato numerico corretto e non \u00e8 completa. La correttezza \u00e8 bassa perch\u00e9 il valore fornito \u00e8 errato."", ""similarity"": 60, ""correctness"": 20, ""completeness"": 30, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 10 % di 200?\n Risposta Attesa: 20\n Risposta Effettiva: 10 %.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 50,\n \""explanation\"": \""La risposta effettiva \u00e8 simile alla risposta attesa nel contesto (stima percentuale), ma \u00e8 mancante del risultato numerico corretto e non \u00e8 completa. La correttezza \u00e8 bassa perch\u00e9 il valore fornito \u00e8 errato.\"",\n \""similarity\"": 60,\n \""correctness\"": 20,\n \""completeness\"": 30\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 10 % di 200?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""10 %.""}}, ""f86e3a38-0a40-4116-ba15-d1708d84e1dd"": {""question"": ""Qual \u00e8 il minimo comune multiplo di 4 e 6?"", ""expected_answer"": ""12"", ""actual_answer"": ""Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel formato. \u00c8 semanticamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il minimo comune multiplo di 4 e 6?\n Risposta Attesa: 12\n Risposta Effettiva: Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel formato. \u00c8 semanticamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il minimo comune multiplo di 4 e 6?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**.""}}, ""4fe182fb-5894-46e8-b0a5-f7197af72ee2"": {""question"": ""Quanto fa (8 + 2) \u00d7 5?"", ""expected_answer"": ""50"", ""actual_answer"": ""(8 + 2) \u00d7 5 = 10 \u00d7 5 = 50"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, coprendo tutti i punti chiave."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa (8 + 2) \u00d7 5?\n Risposta Attesa: 50\n Risposta Effettiva: (8 + 2) \u00d7 5 = 10 \u00d7 5 = 50\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, coprendo tutti i punti chiave.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa (8 + 2) \u00d7 5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""(8 + 2) \u00d7 5 = 10 \u00d7 5 = 50""}}, ""05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6"": {""question"": ""Quanto fa 2\u00b3?"", ""expected_answer"": ""8"", ""actual_answer"": ""2\u00b3 fa **8**."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, correttamente formattata e contenente la stessa informazione chiave. \u00c8 corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 2\u00b3?\n Risposta Attesa: 8\n Risposta Effettiva: 2\u00b3 fa **8**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, correttamente formattata e contenente la stessa informazione chiave. \u00c8 corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 2\u00b3?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""2\u00b3 fa **8**.""}}, ""b69ebaa6-1413-46bb-91cd-5585da5a56b4"": {""question"": ""Qual \u00e8 il massimo comune divisore di 18 e 24?"", ""expected_answer"": ""6"", ""actual_answer"": ""Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, corrette e completa. Contiene l'informazione chiave (MCD \u00e8 6) in modo chiaro."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il massimo comune divisore di 18 e 24?\n Risposta Attesa: 6\n Risposta Effettiva: Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, corrette e completa. Contiene l'informazione chiave (MCD \u00e8 6) in modo chiaro.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il massimo comune divisore di 18 e 24?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6.""}}, ""28ff22b3-2565-4700-b0e9-027dbb4aa862"": {""question"": ""Quanto fa 0 \u00d7 154?"", ""expected_answer"": ""0"", ""actual_answer"": ""0 \u00d7 154 fa 0."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 perfettamente correta e semanticamente identica alla risposta attesa. Contiene tutti i punti chiave richiesti."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 0 \u00d7 154?\n Risposta Attesa: 0\n Risposta Effettiva: 0 \u00d7 154 fa 0.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 perfettamente correta e semanticamente identica alla risposta attesa. Contiene tutti i punti chiave richiesti.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 0 \u00d7 154?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0 \u00d7 154 fa 0.""}}, ""97a38e3b-b87e-48f9-9360-cfbea3bd822f"": {""question"": ""Qual \u00e8 il reciproco di 2?"", ""expected_answer"": ""1/2"", ""actual_answer"": ""Il reciproco di 2 \u00e8 1/2."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel significato, quindi ottiene il massimo punteggio in tutte le metriche."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il reciproco di 2?\n Risposta Attesa: 1/2\n Risposta Effettiva: Il reciproco di 2 \u00e8 1/2.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel significato, quindi ottiene il massimo punteggio in tutte le metriche.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il reciproco di 2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il reciproco di 2 \u00e8 1/2.""}}, ""40563245-06cc-4956-bddd-248f6418c00d"": {""question"": ""Quanto fa 1/4 + 1/2?"", ""expected_answer"": ""3/4"", ""actual_answer"": ""1/4 + 1/2 = 3/4."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 1/4 + 1/2?\n Risposta Attesa: 3/4\n Risposta Effettiva: 1/4 + 1/2 = 3/4.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 1/4 + 1/2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""1/4 + 1/2 = 3/4.""}}, ""c136bf44-742d-4493-b26c-63b72ab2279d"": {""question"": ""Quanto fa 0.1 + 0.2?"", ""expected_answer"": ""0.3"", ""actual_answer"": ""0.3"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 0.1 + 0.2?\n Risposta Attesa: 0.3\n Risposta Effettiva: 0.3\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 0.1 + 0.2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0.3""}}, ""42c1bc09-0949-4cdc-b8c2-408f8109d544"": {""question"": ""Qual \u00e8 il risultato di -3 + 7?"", ""expected_answer"": ""4"", ""actual_answer"": ""4"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il risultato di -3 + 7?\n Risposta Attesa: 4\n Risposta Effettiva: 4\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il risultato di -3 + 7?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""4""}}, ""2b5d1726-d399-4100-b6a6-abeb46f598ae"": {""question"": ""Quanto fa 20 % di 80?"", ""expected_answer"": ""16"", ""actual_answer"": ""20 %."", ""evaluation"": {""score"": 40, ""explanation"": ""La risposta effettiva \u00e8 incompleta e non fornisce il risultato numerico richiesto. Tuttavia, contiene l'indicazione del calcolo ('20 %'), che \u00e8 semanticamente simile alla risposta attesa. La correttezza \u00e8 bassa perch\u00e9 manca il risultato, e la completezza \u00e8 nulla poich\u00e9 non contiene il valore corretto."", ""similarity"": 50, ""correctness"": 20, ""completeness"": 0, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 20 % di 80?\n Risposta Attesa: 16\n Risposta Effettiva: 20 %.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 40,\n \""explanation\"": \""La risposta effettiva \u00e8 incompleta e non fornisce il risultato numerico richiesto. Tuttavia, contiene l'indicazione del calcolo ('20 %'), che \u00e8 semanticamente simile alla risposta attesa. La correttezza \u00e8 bassa perch\u00e9 manca il risultato, e la completezza \u00e8 nulla poich\u00e9 non contiene il valore corretto.\"",\n \""similarity\"": 50,\n \""correctness\"": 20,\n \""completeness\"": 0\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 20 % di 80?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""20 %.""}}, ""481e3cbb-0b18-4139-a2e3-7a723b014414"": {""question"": ""Qual \u00e8 la met\u00e0 di 1.5?"", ""expected_answer"": ""0.75"", ""actual_answer"": ""0.75"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi ottiene il punteggio massimo in tutte le metriche."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 la met\u00e0 di 1.5?\n Risposta Attesa: 0.75\n Risposta Effettiva: 0.75\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi ottiene il punteggio massimo in tutte le metriche.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 la met\u00e0 di 1.5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0.75""}}}}" diff --git a/models/database.py b/models/database.py new file mode 100644 index 0000000..d3b9dad --- /dev/null +++ b/models/database.py @@ -0,0 +1,126 @@ +import logging +import threading +import configparser +from pathlib import Path +from typing import Mapping, Optional + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session, sessionmaker, DeclarativeBase + +logger = logging.getLogger(__name__) + + +class DatabaseEngine: + """Singleton thread-safe che fornisce l'engine del database e le sessioni.""" + + _instance = None + _instance_lock = threading.Lock() + + def __new__(cls, *args, **kwargs): + if cls._instance is not None: + raise RuntimeError( + "DatabaseEngine è un singleton; usa DatabaseEngine.instance()" + ) + return super().__new__(cls) + + def __init__(self) -> None: + if self.__class__._instance is not None: + raise RuntimeError( + "DatabaseEngine è un singleton; usa DatabaseEngine.instance()" + ) + self._engine: Optional[Engine] = None + self._session_factory: Optional[sessionmaker] = None + self._engine_lock = threading.Lock() + self._session_lock = threading.Lock() + + @classmethod + def instance(cls) -> "DatabaseEngine": + if cls._instance is None: + with cls._instance_lock: + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def reset_instance(cls) -> None: + """Reimposta l'istanza singleton e svuota le risorse in cache.""" + with cls._instance_lock: + if cls._instance is not None: + with cls._instance._engine_lock: + if cls._instance._engine is not None: + cls._instance._engine.dispose() + cls._instance._engine = None + with cls._instance._session_lock: + cls._instance._session_factory = None + cls._instance = None + + def _load_config(self) -> Mapping[str, str]: + config = configparser.ConfigParser() + root = Path(__file__).resolve().parent.parent + cfg_path = root / "db.config" + if not cfg_path.exists(): + cfg_path = root / "db.config.example" + config.read(cfg_path) + return config["mysql"] + + def _ensure_database(self, cfg: Mapping[str, str]) -> None: + """Crea il database di destinazione se non esiste già.""" + root_url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" + ) + engine = create_engine(root_url, isolation_level="AUTOCOMMIT") + try: + with engine.begin() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + except Exception as exc: + logger.exception( + "Impossibile creare il database '%s' sull'host '%s' con l'utente '%s'", + cfg.get("database"), + cfg.get("host"), + cfg.get("user"), + ) + raise RuntimeError( + ( + f"Impossibile creare il database '{cfg.get('database')}' " + f"sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " + "Il server del database potrebbe non essere raggiungibile, le credenziali potrebbero essere errate " + "o l'utente potrebbe non avere privilegi sufficienti." + ) + ) from exc + + def get_engine(self) -> Engine: + if self._engine is None: + with self._engine_lock: + if self._engine is None: + cfg = self._load_config() + self._ensure_database(cfg) + url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}/{cfg['database']}" + ) + self._engine = create_engine( + url, + pool_pre_ping=True, + pool_recycle=3600, + ) + assert self._engine is not None + return self._engine + + def get_session(self) -> Session: + if self._session_factory is None: + with self._session_lock: + if self._session_factory is None: + engine = self.get_engine() + self._session_factory = sessionmaker(bind=engine) + assert self._session_factory is not None + return self._session_factory() + + def init_db(self) -> None: + engine = self.get_engine() + import models.orm_models # noqa: F401 + Base.metadata.create_all(engine) + + +class Base(DeclarativeBase): + """Base class per i modelli dichiarativi SQLAlchemy.""" + pass diff --git a/models/orm_models.py b/models/orm_models.py new file mode 100644 index 0000000..27a4bbd --- /dev/null +++ b/models/orm_models.py @@ -0,0 +1,61 @@ +"""Modelli ORM SQLAlchemy per i dati dell'applicazione.""" + +import logging + +from typing import List + +from sqlalchemy import Column, String, Text, Float, Integer, ForeignKey, Table, JSON +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from .database import Base +logger = logging.getLogger(__name__) + +# Tabella di associazione per la relazione molti-a-molti tra set e domande +question_set_questions = Table( + "question_set_questions", + Base.metadata, + Column("set_id", String(36), ForeignKey("question_sets.id"), primary_key=True), + Column("question_id", String(36), ForeignKey("questions.id"), primary_key=True), +) + + +class QuestionORM(Base): + __tablename__ = "questions" + id: Mapped[str] = mapped_column(String(36), primary_key=True) + domanda: Mapped[str] = mapped_column(Text, nullable=False) + risposta_attesa: Mapped[str] = mapped_column(Text, nullable=False) + categoria: Mapped[str] = mapped_column(Text, default="") + + sets: Mapped[List["QuestionSetORM"]] = relationship( + "QuestionSetORM", secondary=question_set_questions, back_populates="questions" + ) + + +class QuestionSetORM(Base): + __tablename__ = "question_sets" + id: Mapped[str] = mapped_column(String(36), primary_key=True) + name: Mapped[str] = mapped_column(Text, nullable=False) + + questions: Mapped[List["QuestionORM"]] = relationship( + "QuestionORM", secondary=question_set_questions, back_populates="sets" + ) + + +class TestResultORM(Base): + __tablename__ = "test_results" + id: Mapped[str] = mapped_column(String(36), primary_key=True) + set_id: Mapped[str] = mapped_column(String(36)) + timestamp: Mapped[str] = mapped_column(Text) + results: Mapped[dict] = mapped_column(JSON) + + +class APIPresetORM(Base): + __tablename__ = "api_presets" + id: Mapped[str] = mapped_column(String(36), primary_key=True) + name: Mapped[str] = mapped_column(Text) + provider_name: Mapped[str] = mapped_column(Text) + endpoint: Mapped[str] = mapped_column(Text) + api_key: Mapped[str] = mapped_column(Text) + model: Mapped[str] = mapped_column(Text) + temperature: Mapped[float] = mapped_column(Float) + max_tokens: Mapped[int] = mapped_column(Integer) diff --git a/models/question.py b/models/question.py new file mode 100644 index 0000000..2f04454 --- /dev/null +++ b/models/question.py @@ -0,0 +1,169 @@ +import logging + +from dataclasses import dataclass +from typing import IO, List, Optional, Tuple, Dict, Any, cast +import uuid +import pandas as pd +from sqlalchemy import select, delete +from sqlalchemy.orm import Mapper + +from models.database import DatabaseEngine +from models.orm_models import QuestionORM, question_set_questions +from utils.data_format_utils import format_questions_for_view +from utils.file_reader_utils import read_questions, filter_new_rows +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate +logger = logging.getLogger(__name__) + + +@dataclass +class Question: + id: str + domanda: str + risposta_attesa: str + categoria: str = "" + + @staticmethod + def load_all() -> List["Question"]: + with DatabaseEngine.instance().get_session() as session: + results = session.execute(select(QuestionORM)).scalars().all() + return [ + Question( + id=q.id, + domanda=q.domanda or "", + risposta_attesa=q.risposta_attesa or "", + categoria=q.categoria or "", + ) + for q in results + ] + + @staticmethod + def add(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: + qid = question_id or str(uuid.uuid4()) + with DatabaseEngine.instance().get_session() as session: + session.add( + QuestionORM( + id=qid, + domanda=domanda, + risposta_attesa=risposta_attesa, + categoria=categoria, + ) + ) + session.commit() + return qid + + @staticmethod + def update( + question_id: str, + domanda: Optional[str] = None, + risposta_attesa: Optional[str] = None, + categoria: Optional[str] = None, + ) -> bool: + """Aggiorna una domanda esistente. + + Restituisce ``True`` se l'aggiornamento è andato a buon fine, + ``False`` se la domanda non esiste. + """ + with DatabaseEngine.instance().get_session() as session: + q = session.get(QuestionORM, question_id) + if not q: + return False + if domanda is not None: + q.domanda = domanda + if risposta_attesa is not None: + q.risposta_attesa = risposta_attesa + if categoria is not None: + q.categoria = categoria + session.commit() + return True + + @staticmethod + def delete(question_id: str) -> None: + with DatabaseEngine.instance().get_session() as session: + session.execute( + delete(question_set_questions).where(question_set_questions.c.question_id == question_id) + ) + q = session.get(QuestionORM, question_id) + if q: + session.delete(q) + session.commit() + + @staticmethod + def _persist_entities(df: pd.DataFrame) -> Tuple[int, List[str]]: + """Persiste nuove domande da ``df`` evitando duplicati. + + Parametri + --------- + df: DataFrame + Dati delle domande normalizzati. + + Restituisce + ----------- + Tuple[int, list[str]] + Numero di domande importate ed elenco degli avvisi. + """ + + warnings: List[str] = [] + with DatabaseEngine.instance().get_session() as session: + existing_ids = session.execute(select(QuestionORM.id)).scalars().all() + + df_unique = df.drop_duplicates(subset="id", keep="first") + duplicated_ids = set(df["id"].astype(str)) - set( + df_unique["id"].astype(str) + ) + for dup in duplicated_ids: + warnings.append( + f"Domanda con ID '{dup}' già presente nel file; saltata." + ) + + new_rows, added_count = filter_new_rows(df_unique, existing_ids) + skipped_ids = set(df_unique["id"].astype(str)) - set( + new_rows["id"].astype(str) + ) + for sid in skipped_ids: + warnings.append( + f"Domanda con ID '{sid}' già esistente; saltata." + ) + + if added_count > 0: + session.bulk_insert_mappings( + cast(Mapper[Any], QuestionORM.__mapper__), + new_rows.to_dict(orient="records"), + ) + session.commit() + + return added_count, warnings + + @staticmethod + def filter_by_category( + category: Optional[str] = None, + ) -> Tuple[pd.DataFrame, List[str]]: + """Restituisce le domande filtrate per categoria e tutte le categorie.""" + + from utils.cache import get_questions # Import locale per evitare cicli + df = get_questions() + df, _, categories = format_questions_for_view(df) + filtered_df = df[df["categoria"] == category] if category else df + + return filtered_df, categories + + +class QuestionImporter(ImportTemplate, ExportTemplate): + """Importer per le domande basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> pd.DataFrame: # type: ignore[override] + """Legge le domande dal file usando ``read_questions``.""" + return read_questions(file) + + def persist_data(self, df: pd.DataFrame) -> Dict[str, Any]: # type: ignore[override] + """Persiste i dati tramite :meth:`Question._persist_entities`.""" + imported, warnings = Question._persist_entities(df) + return {"success": True, "imported_count": imported, "warnings": warnings} + + def gather_data(self) -> pd.DataFrame: # type: ignore[override] + """Recupera tutte le domande dal database.""" + questions = Question.load_all() + return pd.DataFrame([q.__dict__ for q in questions]) + + +question_importer = QuestionImporter() diff --git a/models/question_set.py b/models/question_set.py new file mode 100644 index 0000000..8cb06bf --- /dev/null +++ b/models/question_set.py @@ -0,0 +1,294 @@ +import logging + +from dataclasses import dataclass, field +from typing import Any, Dict, IO, List, Optional, Tuple +import uuid +import pandas as pd +from sqlalchemy import select + +from utils.file_reader_utils import read_question_sets +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate +from models.database import DatabaseEngine +from models.orm_models import QuestionSetORM, QuestionORM +logger = logging.getLogger(__name__) + + +@dataclass +class PersistSetsResult: + """Risultato della funzione ``persist_sets``.""" + + sets_df: pd.DataFrame + questions_df: pd.DataFrame + sets_imported_count: int + new_questions_added_count: int + existing_questions_found_count: int + warnings: List[str] + + +@dataclass +class QuestionSet: + id: str + name: str + questions: List[str] = field(default_factory=list) + + @staticmethod + def load_all() -> List["QuestionSet"]: + with DatabaseEngine.instance().get_session() as session: + sets = session.execute(select(QuestionSetORM)).scalars().all() + return [ + QuestionSet( + id=s.id, + name=s.name or "", + questions=[q.id for q in s.questions], + ) + for s in sets + ] + + @staticmethod + def create(name: str, question_ids: Optional[List[str]] = None) -> str: + set_id = str(uuid.uuid4()) + q_ids = [str(q) for q in (question_ids or [])] + with DatabaseEngine.instance().get_session() as session: + qs = [] + for qid in q_ids: + q_obj = session.get(QuestionORM, qid) + if q_obj: + qs.append(q_obj) + qset = QuestionSetORM(id=set_id, name=name, questions=qs) + session.add(qset) + session.commit() + return set_id + + @staticmethod + def update(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + if not qset: + return + if name is not None: + qset.name = name + if question_ids is not None: + qs = [] + for qid in question_ids: + q_obj = session.get(QuestionORM, qid) + if q_obj: + qs.append(q_obj) + qset.questions = qs + session.commit() + + @staticmethod + def delete(set_id: str) -> None: + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + if qset: + session.delete(qset) + session.commit() + + @staticmethod + def _resolve_question_ids( + questions_in_set_data: List[Any], + current_questions: pd.DataFrame, + ) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: + """Risolve gli identificatori delle domande per un set di domande.""" + warnings: List[str] = [] + question_ids: List[str] = [] + new_added = 0 + existing_found = 0 + + for q_idx, q_data in enumerate(questions_in_set_data): + if isinstance(q_data, dict): + q_id = str(q_data.get("id", "")) + q_text = q_data.get("domanda", "") + q_answer = q_data.get("risposta_attesa", "") + q_category = q_data.get("categoria", "") + else: + q_id = str(q_data) + q_text = "" + q_answer = "" + q_category = "" + + if not q_id: + warnings.append(f"Domanda #{q_idx + 1} senza ID (saltata).") + continue + + if q_text and q_answer: + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + from controllers.question_controller import ( + add_question_if_not_exists, + ) + was_added = add_question_if_not_exists( + question_id=q_id, + domanda=q_text, + risposta_attesa=q_answer, + categoria=q_category, + ) + if was_added: + new_added += 1 + question_ids.append(q_id) + new_row = pd.DataFrame( + { + "id": [q_id], + "domanda": [q_text], + "risposta_attesa": [q_answer], + "categoria": [q_category], + } + ) + current_questions = pd.concat( + [current_questions, new_row], ignore_index=True + ) + else: + existing_found += 1 + question_ids.append(q_id) + continue + + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + warnings.append( + f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." + ) + + return question_ids, current_questions, new_added, existing_found, warnings + + @staticmethod + def _persist_entities( + sets_data: List[Dict[str, Any]], + current_questions: pd.DataFrame, + current_sets: pd.DataFrame, + ) -> "PersistSetsResult": + """Crea set di domande dai dati analizzati.""" + if not isinstance(sets_data, list): + raise ValueError("I dati dei set devono essere una lista.") + + sets_imported_count = 0 + new_questions_added_count = 0 + existing_questions_found_count = 0 + warnings: List[str] = [] + + for set_idx, set_data in enumerate(sets_data): + if not isinstance(set_data, dict): + warnings.append( + f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." + ) + continue + + set_name = set_data.get("name") + questions_in_set_data = set_data.get("questions", []) + + if not set_name or not isinstance(set_name, str) or not set_name.strip(): + warnings.append( + f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." + ) + continue + + if not isinstance(questions_in_set_data, list): + warnings.append( + f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." + ) + continue + + if set_name in current_sets.get("name", pd.Series([])).values: + warnings.append( + f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." + ) + continue + + ( + question_ids, + current_questions, + added, + existing, + q_warnings, + ) = QuestionSet._resolve_question_ids( + questions_in_set_data, current_questions + ) + warnings.extend(q_warnings) + + if question_ids or len(questions_in_set_data) == 0: + try: + QuestionSet.create(set_name, question_ids) + sets_imported_count += 1 + except Exception as e: # pragma: no cover - protective + warnings.append( + f"Errore durante la creazione del set '{set_name}': {e}" + ) + else: + warnings.append( + f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + ) + + new_questions_added_count += added + existing_questions_found_count += existing + + from utils.cache import refresh_question_sets as _refresh_question_sets + sets_df = _refresh_question_sets() + + return PersistSetsResult( + sets_df=sets_df, + questions_df=current_questions, + sets_imported_count=sets_imported_count, + new_questions_added_count=new_questions_added_count, + existing_questions_found_count=existing_questions_found_count, + warnings=warnings, + ) + + @staticmethod + def import_from_file(uploaded_file: IO[str] | IO[bytes]) -> "PersistSetsResult": + """Deprecated wrapper for compatibility. + + Usa :class:`QuestionSetImporter` per le nuove importazioni. + """ + + if uploaded_file is None: + raise ValueError("Nessun file fornito per l'importazione.") + + import warnings + + warnings.warn( + "QuestionSet.import_from_file è deprecato; usa QuestionSetImporter.import_from_file", + DeprecationWarning, + stacklevel=2, + ) + + return question_set_importer.import_from_file(uploaded_file) + + +class QuestionSetImporter(ImportTemplate, ExportTemplate): + """Importer per i set di domande basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> List[Dict[str, Any]]: # type: ignore[override] + """Legge i set di domande dal file usando ``read_question_sets``.""" + return read_question_sets(file) + + def persist_data(self, parsed: List[Dict[str, Any]]) -> PersistSetsResult: # type: ignore[override] + """Persiste i dati tramite :meth:`QuestionSet._persist_entities`.""" + from controllers.question_controller import load_questions + from controllers.question_set_controller import load_sets + + current_questions = load_questions() + current_sets = load_sets() + + return QuestionSet._persist_entities(parsed, current_questions, current_sets) + + def gather_data(self) -> List[Dict[str, Any]]: # type: ignore[override] + """Recupera tutti i set di domande con i dettagli delle domande.""" + from models.question import Question + + sets = QuestionSet.load_all() + questions = { + q.id: {"id": q.id, "domanda": q.domanda, "risposta_attesa": q.risposta_attesa, "categoria": q.categoria} + for q in Question.load_all() + } + data: List[Dict[str, Any]] = [] + for s in sets: + q_list = [questions.get(qid, {"id": qid}) for qid in s.questions] + data.append({"name": s.name, "questions": q_list}) + return data + + +question_set_importer = QuestionSetImporter() diff --git a/models/test_result.py b/models/test_result.py new file mode 100644 index 0000000..0ed93c8 --- /dev/null +++ b/models/test_result.py @@ -0,0 +1,196 @@ +import logging + +from dataclasses import dataclass, asdict +import uuid +from typing import IO, Any, Dict, List, cast +from functools import lru_cache + +import pandas as pd +from sqlalchemy import select + +from models.database import DatabaseEngine +from models.orm_models import TestResultORM +from utils.file_reader_utils import read_test_results, filter_new_rows +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate + +logger = logging.getLogger(__name__) + + +@dataclass +class TestResult: + id: str + set_id: str + timestamp: str + results: dict[str, Any] + __test__ = False + + @staticmethod + def load_all() -> List["TestResult"]: + with DatabaseEngine.instance().get_session() as session: + results = session.execute(select(TestResultORM)).scalars().all() + return [ + TestResult( + id=cast(str, r.id), + set_id=cast(str, r.set_id), + timestamp=cast(str, r.timestamp), + results=cast(dict[str, Any], r.results or {}), + ) + for r in results + ] + + @staticmethod + @lru_cache(maxsize=1) + def load_all_df() -> pd.DataFrame: + """Restituisce tutti i risultati come DataFrame pandas con caching.""" + data = [asdict(r) for r in TestResult.load_all()] + columns = ["id", "set_id", "timestamp", "results"] + return pd.DataFrame(data, columns=columns) + + @staticmethod + def refresh_cache() -> pd.DataFrame: + """Svuota e ricarica il DataFrame in cache dei risultati.""" + TestResult.load_all_df.cache_clear() + return TestResult.load_all_df() + + @staticmethod + def _persist_entities(imported_df: pd.DataFrame) -> int: + """Persiste nuovi risultati di test evitando duplicati. + + Parametri + --------- + imported_df: DataFrame + Dati dei risultati normalizzati. + + Restituisce + ----------- + int + Numero di nuovi risultati inseriti. + """ + + existing_df = TestResult.load_all_df() + existing_ids = ( + existing_df["id"].astype(str).tolist() if not existing_df.empty else [] + ) + new_rows, added_count = filter_new_rows(imported_df, existing_ids) + + if added_count > 0: + combined_df = pd.concat([existing_df, new_rows], ignore_index=True) + results = [ + TestResult(**row) for row in combined_df.to_dict(orient="records") + ] + TestResult.save(results) + return added_count + + @staticmethod + def save(results: List["TestResult"]) -> None: + """Salva un elenco di risultati di test.""" + with DatabaseEngine.instance().get_session() as session: + existing_ids = session.execute(select(TestResultORM.id)).scalars().all() + incoming_ids = [r.id for r in results] + + for rid in set(existing_ids) - set(incoming_ids): + obj = session.get(TestResultORM, rid) + if obj: + session.delete(obj) + + for result in results: + obj = session.get(TestResultORM, result.id) + if obj: + obj_cast = cast(Any, obj) + obj_cast.set_id = result.set_id + obj_cast.timestamp = result.timestamp + obj_cast.results = result.results + else: + session.add(TestResultORM(**asdict(result))) + session.commit() + + @staticmethod + def add(set_id: str, results_data: dict[str, Any]) -> str: + result_id = str(uuid.uuid4()) + with DatabaseEngine.instance().get_session() as session: + session.add( + TestResultORM( + id=result_id, + set_id=set_id, + timestamp=results_data.get('timestamp', ''), + results=results_data, + ) + ) + session.commit() + return result_id + + @staticmethod + def add_and_refresh(set_id: str, results_data: dict[str, Any]) -> str: + """Salva un singolo risultato e aggiorna il DataFrame in cache.""" + rid = TestResult.add(set_id, results_data) + TestResult.refresh_cache() + return rid + + @staticmethod + def calculate_statistics( + questions_results: Dict[str, Dict[str, Any]] + ) -> Dict[str, Any]: + if not questions_results: + return { + "avg_score": 0, + "per_question_scores": [], + "radar_metrics": { + "similarity": 0, + "correctness": 0, + "completeness": 0, + }, + } + + per_question_scores: List[Dict[str, Any]] = [] + radar_sums = {"similarity": 0, "correctness": 0, "completeness": 0} + + for qdata in questions_results.values(): + evaluation = qdata.get("evaluation", {}) + score = evaluation.get("score", 0) + per_question_scores.append( + {"question": qdata.get("question", "Domanda"), "score": score} + ) + for metric in radar_sums.keys(): + radar_sums[metric] += evaluation.get(metric, 0) + + count = len(per_question_scores) + avg_score = ( + sum(item["score"] for item in per_question_scores) / count if count > 0 else 0 + ) + radar_metrics = { + metric: radar_sums[metric] / count if count > 0 else 0 + for metric in radar_sums + } + return { + "avg_score": avg_score, + "per_question_scores": per_question_scores, + "radar_metrics": radar_metrics, + } + + +class TestResultImporter(ImportTemplate, ExportTemplate): + """Importer per i risultati di test basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> pd.DataFrame: # type: ignore[override] + """Legge i risultati dal file usando ``read_test_results``.""" + return read_test_results(file) + + def persist_data(self, df: pd.DataFrame) -> Dict[str, Any]: # type: ignore[override] + """Persiste i dati tramite :meth:`TestResult._persist_entities`.""" + added_count = TestResult._persist_entities(df) + if added_count > 0: + TestResult.refresh_cache() + message = ( + f"Importati {added_count} risultati." + if added_count > 0 + else "Nessun nuovo risultato importato." + ) + return {"success": True, "imported_count": added_count, "message": message} + + def gather_data(self) -> pd.DataFrame: # type: ignore[override] + """Recupera tutti i risultati dei test dal database.""" + return TestResult.load_all_df() + + +test_result_importer = TestResultImporter() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a887140 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[tool.flake8] +max-line-length = 250 +extend-ignore = ["E203", "W503","E501"] + +[tool.mypy] +python_version = "3.11" +strict = false +ignore_missing_imports = true diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..5ee6477 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +testpaths = tests diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d338547 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +flake8>=7.3.0 +mypy>=1.17.1 +pytest-mock>=3.14.1 +pytest>=7.0.0 +pytest-cov>=4.0.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..09fc9fe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +streamlit>=1.28.0 +pandas>=1.5.0 +plotly>=5.0.0 +openai>=1.0.0 +sqlalchemy>=2.0.0 +pymysql>=1.0.0 +cryptography>=42.0.0 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..fcdbe1a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,25 @@ +import pathlib +import sys + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from models.database import DatabaseEngine, Base + + +@pytest.fixture() +def in_memory_db(): + # Reimposta il singleton per garantire uno stato pulito + DatabaseEngine.reset_instance() + db = DatabaseEngine.instance() + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + db._engine = engine # type: ignore[attr-defined] + db._session_factory = sessionmaker(bind=engine) # type: ignore[attr-defined] + yield db + # Reimposta dopo il test + DatabaseEngine.reset_instance() + diff --git a/tests/sample_data/question_sets.csv b/tests/sample_data/question_sets.csv new file mode 100644 index 0000000..beb1afa --- /dev/null +++ b/tests/sample_data/question_sets.csv @@ -0,0 +1,5 @@ +name,id,domanda,risposta_attesa,categoria +Set1,q1,Existing question?,Answer1,cat1 +Set2,q1,,, +Set2,q2,New question?,Answer2,cat2 +Set2,,No ID question?,No ID answer,cat3 diff --git a/tests/sample_data/question_sets.json b/tests/sample_data/question_sets.json new file mode 100644 index 0000000..715d273 --- /dev/null +++ b/tests/sample_data/question_sets.json @@ -0,0 +1,8 @@ +[ + {"name": "Set1", "questions": []}, + {"name": "Set2", "questions": [ + {"id": "q1"}, + {"id": "q2", "domanda": "New question?", "risposta_attesa": "Answer2", "categoria": "cat2"}, + {"domanda": "No ID question?", "risposta_attesa": "No ID answer", "categoria": "cat3"} + ]} +] diff --git a/tests/sample_data/questions.csv b/tests/sample_data/questions.csv new file mode 100644 index 0000000..e0425fa --- /dev/null +++ b/tests/sample_data/questions.csv @@ -0,0 +1,4 @@ +id,domanda,risposta_attesa,categoria +q1,Existing question?,Answer1,cat1 +q2,New question?,Answer2,cat2 +q2,Duplicate question?,Answer3,cat3 diff --git a/tests/sample_data/questions.json b/tests/sample_data/questions.json new file mode 100644 index 0000000..3a20528 --- /dev/null +++ b/tests/sample_data/questions.json @@ -0,0 +1,5 @@ +[ + {"id": "q1", "domanda": "Existing question?", "risposta_attesa": "Answer1", "categoria": "cat1"}, + {"id": "q2", "domanda": "New question?", "risposta_attesa": "Answer2", "categoria": "cat2"}, + {"id": "q2", "domanda": "Duplicate question?", "risposta_attesa": "Answer3", "categoria": "cat3"} +] diff --git a/tests/sample_data/test_results.csv b/tests/sample_data/test_results.csv new file mode 100644 index 0000000..80866f3 --- /dev/null +++ b/tests/sample_data/test_results.csv @@ -0,0 +1,3 @@ +id,set_id,timestamp,results +1,s1,2023-01-01,{} +2,s2,2023-01-02,{} diff --git a/tests/sample_data/test_results.json b/tests/sample_data/test_results.json new file mode 100644 index 0000000..0005229 --- /dev/null +++ b/tests/sample_data/test_results.json @@ -0,0 +1,4 @@ +[ + {"id": "1", "set_id": "s1", "timestamp": "2023-01-01", "results": {}}, + {"id": "2", "set_id": "s2", "timestamp": "2023-01-02", "results": {}} +] diff --git a/tests/test_api_configurazione_view.py b/tests/test_api_configurazione_view.py new file mode 100644 index 0000000..aa97621 --- /dev/null +++ b/tests/test_api_configurazione_view.py @@ -0,0 +1,160 @@ +import os +import sys +import importlib +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + + +@pytest.fixture +def view(monkeypatch): + import controllers + + monkeypatch.setattr(controllers, "load_presets", lambda: pd.DataFrame()) + monkeypatch.setattr(controllers, "list_presets", lambda *_: []) + + module = importlib.reload(importlib.import_module("views.api_configurazione")) + return module + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.errors = [] + self.successes = [] + + def error(self, msg): + self.errors.append(msg) + + def success(self, msg): + self.successes.append(msg) + + +def test_start_new_preset_edit_initializes_session_state(monkeypatch, view): + dummy = DummySt() + monkeypatch.setattr(view, "st", dummy) + + view.start_new_preset_edit() + + assert dummy.session_state.editing_preset is True + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == { + "name": "", + "endpoint": DEFAULT_ENDPOINT, + "api_key": "", + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } + + +def test_start_existing_preset_edit_initializes_session_state(monkeypatch, mocker, view): + dummy = DummySt() + dummy.session_state.api_presets = object() + monkeypatch.setattr(view, "st", dummy) + + preset = { + "name": "Existing", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": "0.2", + "max_tokens": "200", + } + mocker.patch("views.api_configurazione.get_preset_by_id", return_value=preset) + view.start_existing_preset_edit("123") + + assert dummy.session_state.editing_preset is True + assert dummy.session_state.current_preset_edit_id == "123" + assert dummy.session_state.preset_form_data == { + "name": "Existing", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.2, + "max_tokens": 200, + } + assert dummy.errors == [] + + +def test_save_preset_from_form_validation_error(monkeypatch, mocker, view): + dummy = DummySt() + dummy.session_state.preset_form_data = {} + dummy.session_state.current_preset_edit_id = None + monkeypatch.setattr(view, "st", dummy) + + mocker.patch("views.api_configurazione.validate_preset", return_value=(False, "err")) + mock_save = mocker.patch("views.api_configurazione.save_preset") + view.save_preset_from_form() + mock_save.assert_not_called() + + assert dummy.errors == ["err"] + + +def test_save_preset_from_form_success(monkeypatch, mocker, view): + dummy = DummySt() + dummy.session_state.update( + { + "preset_form_data": {}, + "current_preset_edit_id": "1", + "editing_preset": True, + "preset_name": "Name", + "preset_endpoint": "e", + "preset_api_key": "k", + "preset_model": "m", + "preset_temperature": 0.2, + "preset_max_tokens": 200, + } + ) + monkeypatch.setattr(view, "st", dummy) + + updated_df = pd.DataFrame([{"id": "1"}]) + mocker.patch("views.api_configurazione.validate_preset", return_value=(True, "")) + mocker.patch( + "views.api_configurazione.save_preset", + return_value=(True, "saved", updated_df), + ) + view.save_preset_from_form() + + assert dummy.session_state.api_presets is updated_df + assert dummy.successes == ["saved"] + assert dummy.session_state.editing_preset is False + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == {} + + +def test_delete_preset_callback_clears_form_state(monkeypatch, mocker, view): + dummy = DummySt() + dummy.session_state.update( + { + "api_presets": pd.DataFrame([{"id": "2"}]), + "editing_preset": True, + "current_preset_edit_id": "2", + "preset_form_data": {"name": "Old"}, + } + ) + monkeypatch.setattr(view, "st", dummy) + + updated_df = pd.DataFrame([]) + mocker.patch( + "views.api_configurazione.delete_preset", + return_value=(True, "deleted", updated_df), + ) + view.delete_preset_callback("2") + + assert dummy.session_state.api_presets is updated_df + assert dummy.successes == ["deleted"] + assert dummy.session_state.editing_preset is False + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == {} diff --git a/tests/test_api_preset_controller.py b/tests/test_api_preset_controller.py new file mode 100644 index 0000000..d2b9a24 --- /dev/null +++ b/tests/test_api_preset_controller.py @@ -0,0 +1,128 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import api_preset_controller as controller # noqa: E402 + + +def test_validate_preset_empty_name(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + ok, msg = controller.validate_preset({"name": ""}) + assert ok is False + assert "non può essere vuoto" in msg + mock_load.assert_not_called() + + +def test_validate_preset_duplicate(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) + ok, msg = controller.validate_preset({"name": "A"}) + assert ok is False + assert "esiste già" in msg + + +def test_validate_preset_ok(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) + ok, msg = controller.validate_preset({"name": "B"}) + assert ok is True + assert msg == "" + + +def test_save_preset_new(mocker): + mock_uuid = mocker.patch( + "controllers.api_preset_controller.uuid.uuid4", return_value="new-id" + ) + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_save = mocker.patch("controllers.api_preset_controller.APIPreset.save") + mock_refresh = mocker.patch( + "controllers.api_preset_controller.refresh_api_presets" + ) + + df = pd.DataFrame( + [ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ] + ) + mock_load.return_value = df + updated_df = pd.DataFrame([]) + mock_refresh.return_value = updated_df + + ok, msg, returned_df = controller.save_preset( + { + "name": "New", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.1, + "max_tokens": 50, + } + ) + + assert ok is True + assert "creato" in msg + assert returned_df is updated_df + mock_save.assert_called_once() + saved_presets = mock_save.call_args[0][0] + assert any(p.id == "new-id" for p in saved_presets) + assert any(p.name == "New" for p in saved_presets) + + +def test_delete_preset(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_delete = mocker.patch("controllers.api_preset_controller.APIPreset.delete") + mock_refresh = mocker.patch( + "controllers.api_preset_controller.refresh_api_presets" + ) + + df = pd.DataFrame( + [ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ] + ) + mock_load.return_value = df + updated_df = pd.DataFrame([]) + mock_refresh.return_value = updated_df + + ok, msg, returned_df = controller.delete_preset("1") + assert ok is True + assert "eliminato" in msg + assert returned_df is updated_df + mock_delete.assert_called_once_with("1") + + +def test_test_api_connection_delegates(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() + mock_choice.message.content = "Connessione riuscita." + mock_resp = mocker.Mock() + mock_resp.choices = [mock_choice] + mock_client.chat.completions.create.return_value = mock_resp + + ok, msg = controller.test_api_connection("k", "e", "m", 0.1, 10) + + assert ok is True + assert "riuscita" in msg.lower() + mock_get_client.assert_called_once_with(api_key="k", base_url="e") + diff --git a/tests/test_api_preset_model.py b/tests/test_api_preset_model.py new file mode 100644 index 0000000..8874497 --- /dev/null +++ b/tests/test_api_preset_model.py @@ -0,0 +1,133 @@ +import os +import sys +from types import SimpleNamespace + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import models.api_preset as api_preset_module +from models.api_preset import APIPreset +from models.orm_models import APIPresetORM +from models.database import Base + + +@pytest.fixture +def session_factory(monkeypatch): + engine = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(engine) + SessionLocal = sessionmaker(bind=engine) + + dummy_engine = SimpleNamespace(get_session=lambda: SessionLocal()) + monkeypatch.setattr(api_preset_module.DatabaseEngine, "instance", lambda: dummy_engine) + return SessionLocal + + +def test_load_all_returns_correct_attributes(session_factory): + session = session_factory() + session.add( + APIPresetORM( + id="1", + name="Preset", + provider_name="OpenAI", + endpoint="http://api", + api_key="key", + model="gpt", + temperature=0.7, + max_tokens=1000, + ) + ) + session.commit() + session.close() + + presets = APIPreset.load_all() + assert len(presets) == 1 + preset = presets[0] + assert preset.id == "1" + assert preset.name == "Preset" + assert preset.provider_name == "OpenAI" + assert preset.endpoint == "http://api" + assert preset.api_key == "key" + assert preset.model == "gpt" + assert preset.temperature == 0.7 + assert preset.max_tokens == 1000 + + +def test_save_inserts_and_updates(session_factory): + APIPreset.save([ + APIPreset( + id="1", + name="Initial", + provider_name="P", + endpoint="E", + api_key="K", + model="M", + temperature=0.1, + max_tokens=50, + ) + ]) + + session = session_factory() + row = session.get(APIPresetORM, "1") + assert row.name == "Initial" + assert row.max_tokens == 50 + session.close() + + APIPreset.save([ + APIPreset( + id="1", + name="Updated", + provider_name="P2", + endpoint="E2", + api_key="K2", + model="M2", + temperature=0.2, + max_tokens=150, + ) + ]) + + session = session_factory() + row = session.get(APIPresetORM, "1") + assert row.name == "Updated" + assert row.provider_name == "P2" + assert row.endpoint == "E2" + assert row.api_key == "K2" + assert row.model == "M2" + assert row.temperature == 0.2 + assert row.max_tokens == 150 + session.close() + + +def test_delete_existing_and_non_existing(session_factory): + session = session_factory() + session.add( + APIPresetORM( + id="1", + name="Preset", + provider_name="P", + endpoint="E", + api_key="K", + model="M", + temperature=0.1, + max_tokens=10, + ) + ) + session.commit() + session.close() + + APIPreset.delete("1") + session = session_factory() + assert session.get(APIPresetORM, "1") is None + session.close() + + APIPreset.delete("nonexistent") + session = session_factory() + assert session.query(APIPresetORM).count() == 0 + session.close() diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..36a4b29 --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,97 @@ +import importlib +import sys +import types +from pathlib import Path + + +def test_app_page_config_and_navigation(monkeypatch): + """Test di base per la configurazione dell'app Streamlit e l'impostazione della navigazione.""" + # Registra le chiamate all'API di Streamlit + page_config = {} + radio_call = {} + + def fake_set_page_config(**kwargs): + page_config.update(kwargs) + + def fake_radio(label, options): + radio_call["label"] = label + radio_call["options"] = options + return options[0] + + fake_sidebar = types.SimpleNamespace(radio=fake_radio) + + def fake_page(_path, title, **_k): + return types.SimpleNamespace(title=title) + + def fake_navigation(pages): + options = [p.title for p in pages] + fake_sidebar.radio("Navigazione", options) + return types.SimpleNamespace(run=lambda: None) + + fake_st = types.SimpleNamespace( + set_page_config=fake_set_page_config, + sidebar=fake_sidebar, + title=lambda *a, **k: None, + Page=fake_page, + navigation=fake_navigation, + ) + + monkeypatch.setitem(sys.modules, "streamlit", fake_st) + + # Assicura che la radice del repository sia importabile + project_root = Path(__file__).resolve().parent.parent + sys.path.insert(0, str(project_root)) + + # Crea moduli di vista fittizi richiesti da app.py + views_pkg = types.ModuleType("views") + views_pkg.__path__ = [] # contrassegna come pacchetto + view_names = [ + "api_configurazione", + "esecuzione_test", + "gestione_domande", + "gestione_set", + "home", + "visualizza_risultati", + ] + for name in view_names: + mod = types.ModuleType(f"views.{name}") + mod.render = lambda: None + sys.modules[f"views.{name}"] = mod + setattr(views_pkg, name, mod) + + views_pkg.page_registry = { + "Home": views_pkg.home.render, + "Configurazione API": views_pkg.api_configurazione.render, + "Gestione Domande": views_pkg.gestione_domande.render, + "Gestione Set di Domande": views_pkg.gestione_set.render, + "Esecuzione Test": views_pkg.esecuzione_test.render, + "Visualizzazione Risultati": views_pkg.visualizza_risultati.render, + } + + session_state_mod = types.ModuleType("views.session_state") + session_state_mod.initialize_session_state = lambda: None + sys.modules["views.session_state"] = session_state_mod + + style_utils_mod = types.ModuleType("views.style_utils") + style_utils_mod.add_global_styles = lambda: None + style_utils_mod.load_css = lambda: None + sys.modules["views.style_utils"] = style_utils_mod + + sys.modules["views"] = views_pkg + + # Assicura un'importazione pulita di app + monkeypatch.delitem(sys.modules, "app", raising=False) + app = importlib.import_module("app") + + assert page_config["page_title"] == "LLM Test Evaluation Platform" + assert radio_call["label"] == "Navigazione" + expected_pages = [ + "Home", + "Configurazione API", + "Gestione Domande", + "Gestione Set di Domande", + "Esecuzione Test", + "Visualizzazione Risultati", + ] + assert radio_call["options"] == expected_pages + diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py new file mode 100644 index 0000000..fc6758d --- /dev/null +++ b/tests/test_cache_utils.py @@ -0,0 +1,170 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.cache import ( # noqa: E402 + get_questions, + refresh_questions, + get_question_sets, + refresh_question_sets, + get_api_presets, + refresh_api_presets, + get_results, + refresh_results, +) +from models.question import Question # noqa: E402 +from models.question_set import QuestionSet # noqa: E402 +from models.api_preset import APIPreset # noqa: E402 +from models.test_result import TestResult # noqa: E402 + + +def test_get_questions_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + Question(id="1", domanda="Q1", risposta_attesa="A1", categoria="C1") + ] + + monkeypatch.setattr(Question, "load_all", staticmethod(fake_load_all)) + get_questions.cache_clear() + + df1 = get_questions() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_questions() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + Question(id="2", domanda="Q2", risposta_attesa="A2", categoria="C2") + ] + + monkeypatch.setattr(Question, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_questions() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_questions() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_question_sets_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + QuestionSet(id="1", name="S1", questions=["q1"]) + ] + + monkeypatch.setattr(QuestionSet, "load_all", staticmethod(fake_load_all)) + get_question_sets.cache_clear() + + df1 = get_question_sets() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_question_sets() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + QuestionSet(id="2", name="S2", questions=["q2"]) + ] + + monkeypatch.setattr(QuestionSet, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_question_sets() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_question_sets() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_api_presets_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + APIPreset( + id="1", + name="P1", + provider_name="prov", + endpoint="e1", + api_key="k1", + model="m1", + temperature=0.5, + max_tokens=10, + ) + ] + + monkeypatch.setattr(APIPreset, "load_all", staticmethod(fake_load_all)) + get_api_presets.cache_clear() + + df1 = get_api_presets() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_api_presets() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + APIPreset( + id="2", + name="P2", + provider_name="prov2", + endpoint="e2", + api_key="k2", + model="m2", + temperature=0.7, + max_tokens=20, + ) + ] + + monkeypatch.setattr(APIPreset, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_api_presets() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_api_presets() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_and_refresh_results(monkeypatch): + load_called = {"count": 0} + refresh_called = {"count": 0} + df1 = pd.DataFrame([{"id": "1", "set_id": "s1", "timestamp": "t1", "results": {}}]) + df2 = pd.DataFrame([{"id": "2", "set_id": "s2", "timestamp": "t2", "results": {}}]) + + def fake_load_all_df(): + load_called["count"] += 1 + return df1 + + def fake_refresh_cache(): + refresh_called["count"] += 1 + return df2 + + monkeypatch.setattr(TestResult, "load_all_df", staticmethod(fake_load_all_df)) + monkeypatch.setattr(TestResult, "refresh_cache", staticmethod(fake_refresh_cache)) + + assert get_results().equals(df1) + assert load_called["count"] == 1 + + assert refresh_results().equals(df2) + assert refresh_called["count"] == 1 diff --git a/tests/test_component_utils_view.py b/tests/test_component_utils_view.py new file mode 100644 index 0000000..394da4b --- /dev/null +++ b/tests/test_component_utils_view.py @@ -0,0 +1,39 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import component_utils + + +class DummySt: + def __init__(self): + self.calls = [] + + def markdown(self, text, **kwargs): + self.calls.append(text) + + +def test_create_card_renders_expected_html(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(component_utils, 'st', dummy_st) + + component_utils.create_card('Titolo', 'Contenuto', icon='⭐', is_success=True) + + assert any('Titolo' in c and 'Contenuto' in c and '⭐' in c for c in dummy_st.calls) + # success card should have specific background color + assert any('#f8fff9' in c for c in dummy_st.calls) + + +def test_create_metrics_container_renders_metrics(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(component_utils, 'st', dummy_st) + + metrics = [{'label': 'Accuracy', 'value': 95, 'unit': '%', 'icon': '📈'}] + component_utils.create_metrics_container(metrics) + + # first call is CSS, second call is metrics HTML + assert len(dummy_st.calls) >= 2 + metrics_html = dummy_st.calls[-1] + assert 'Accuracy' in metrics_html + assert '95' in metrics_html + assert '📈' in metrics_html diff --git a/tests/test_data_format_utils.py b/tests/test_data_format_utils.py new file mode 100644 index 0000000..febb97c --- /dev/null +++ b/tests/test_data_format_utils.py @@ -0,0 +1,39 @@ +import pandas as pd + +from utils.data_format_utils import build_questions_detail, format_questions_for_view + + +def test_format_questions_for_view_no_category(): + df = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + } + ) + norm_df, question_map, categories = format_questions_for_view(df) + + assert "categoria" in norm_df.columns + assert norm_df.iloc[0]["categoria"] == "N/A" + assert categories == ["N/A"] + assert question_map == {"1": {"domanda": "d1", "categoria": "N/A"}} + + +def test_format_questions_for_view_empty_df(): + df = pd.DataFrame() + norm_df, question_map, categories = format_questions_for_view(df) + + assert list(norm_df.columns) == ["id", "domanda", "risposta_attesa", "categoria"] + assert norm_df.empty + assert question_map == {} + assert categories == [] + + +def test_build_questions_detail(): + question_map = {"1": {"domanda": "d1", "categoria": "A"}} + details = build_questions_detail(question_map, ["1", "2"]) + assert details == [ + {"id": "1", "domanda": "d1", "categoria": "A"}, + {"id": "2", "domanda": "", "categoria": "N/A"}, + ] + assert build_questions_detail(question_map, "notalist") == [] diff --git a/tests/test_esecuzione_test_view.py b/tests/test_esecuzione_test_view.py new file mode 100644 index 0000000..ce791a5 --- /dev/null +++ b/tests/test_esecuzione_test_view.py @@ -0,0 +1,85 @@ +import pytest + +import importlib +import os +import sys +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + + +@pytest.fixture +def esecuzione_test(monkeypatch): + import controllers + import importlib.util + from pathlib import Path + + monkeypatch.setattr( + controllers, + "load_presets", + lambda: pd.DataFrame([{"name": "p"}]), + ) + monkeypatch.setattr( + controllers, + "load_sets", + lambda: pd.DataFrame([{"id": 1, "name": "s", "questions": [1]}]), + ) + + import streamlit as st + + class StopRender(Exception): + pass + + monkeypatch.setattr(st, "stop", lambda: (_ for _ in ()).throw(StopRender())) + + base_path = Path(__file__).resolve().parents[1] / "views" + style_spec = importlib.util.spec_from_file_location("views.style_utils", base_path / "style_utils.py") + style_mod = importlib.util.module_from_spec(style_spec) + sys.modules["views.style_utils"] = style_mod + style_spec.loader.exec_module(style_mod) + + module_path = base_path / "esecuzione_test.py" + spec = importlib.util.spec_from_file_location("views.esecuzione_test", module_path) + module = importlib.util.module_from_spec(spec) + sys.modules["views.esecuzione_test"] = module + try: + spec.loader.exec_module(module) + except StopRender: + pass + return module + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + + +def test_set_llm_mode_callback(monkeypatch, esecuzione_test): + dummy_st = DummySt() + dummy_st.session_state.test_mode = "Manual" + dummy_st.session_state.mode_changed = False + monkeypatch.setattr(esecuzione_test, "st", dummy_st) + + esecuzione_test.set_llm_mode_callback() + + assert dummy_st.session_state.test_mode == "Valutazione Automatica con LLM" + assert dummy_st.session_state.mode_changed is True + + +def test_run_llm_test_callback(monkeypatch, esecuzione_test): + dummy_st = DummySt() + dummy_st.session_state.run_llm_test = False + monkeypatch.setattr(esecuzione_test, "st", dummy_st) + + esecuzione_test.run_llm_test_callback() + + assert dummy_st.session_state.run_llm_test is True diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py new file mode 100644 index 0000000..3adcbfe --- /dev/null +++ b/tests/test_evaluate_answer.py @@ -0,0 +1,91 @@ +import json +import logging +import os +import sys + +import pytest + +from utils.openai_client import ClientCreationError + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers.test_controller import evaluate_answer # noqa: E402 + + +def _mock_response(mocker, content: str): + mock_resp = mocker.Mock() + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +def _mock_response_no_choices(mocker): + mock_resp = mocker.Mock() + mock_resp.choices = [] + return mock_resp + + +def test_evaluate_answer_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + + evaluation = { + "score": 90, + "explanation": "good", + "similarity": 90, + "correctness": 90, + "completeness": 90, + } + mock_client.chat.completions.create.return_value = _mock_response( + mocker, json.dumps(evaluation) + ) + + result = evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert result["score"] == 90 + assert result["similarity"] == 90 + + +def test_evaluate_answer_no_client(mocker): + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) + with pytest.raises(ValueError): + evaluate_answer( + "q", "expected", "actual", {"api_key": None} + ) + + +def test_evaluate_answer_json_decode_error(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + mocker, "not json" + ) + + with pytest.raises(ValueError): + evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + +def test_evaluate_answer_no_choices(mocker, caplog): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response_no_choices(mocker) + + with caplog.at_level(logging.ERROR): + with pytest.raises(RuntimeError): + evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert "choices" in caplog.text diff --git a/tests/test_file_reader_utils.py b/tests/test_file_reader_utils.py new file mode 100644 index 0000000..c4784a8 --- /dev/null +++ b/tests/test_file_reader_utils.py @@ -0,0 +1,90 @@ +import json +import pandas as pd +import pytest + +from utils.file_reader_utils import ( + read_questions, + read_question_sets, + read_test_results, + filter_new_rows, +) + + +def test_read_questions_csv(tmp_path): + file = tmp_path / "questions.csv" + file.write_text("domanda,risposta_attesa\nq1,a1\n") + with file.open("r") as f: + df = read_questions(f) + assert list(df.columns) == ["id", "domanda", "risposta_attesa", "categoria"] + assert df.iloc[0]["domanda"] == "q1" + + +def test_read_questions_json(tmp_path): + content = [{"domanda": "q1", "risposta_attesa": "a1"}] + file = tmp_path / "questions.json" + file.write_text(json.dumps(content)) + with file.open("r") as f: + df = read_questions(f) + assert df.iloc[0]["risposta_attesa"] == "a1" + + +def test_read_questions_missing_column(tmp_path): + file = tmp_path / "bad_questions.csv" + file.write_text("domanda\nq1\n") + with file.open("r") as f: + with pytest.raises(ValueError): + read_questions(f) + + +def test_read_question_sets_csv(tmp_path): + file = tmp_path / "sets.csv" + file.write_text( + "name,id,domanda,risposta_attesa,categoria\ns1,1,q1,a1,c1\n" + ) + with file.open("r") as f: + sets = read_question_sets(f) + assert sets == [ + { + "name": "s1", + "questions": [ + { + "id": "1", + "domanda": "q1", + "risposta_attesa": "a1", + "categoria": "c1", + } + ], + } + ] + + +def test_read_question_sets_missing_columns(tmp_path): + file = tmp_path / "bad_sets.csv" + file.write_text("name,id,domanda\ns1,1,q1\n") + with file.open("r") as f: + with pytest.raises(ValueError): + read_question_sets(f) + + +def test_read_test_results_csv(tmp_path): + file = tmp_path / "results.csv" + file.write_text("id,set_id,timestamp,results\n1,s1,2024-01-01,{}\n") + with file.open("r") as f: + df = read_test_results(f) + assert df.iloc[0]["set_id"] == "s1" + assert df.iloc[0]["results"] == {} + + +def test_read_test_results_invalid_json(tmp_path): + file = tmp_path / "bad_results.json" + file.write_text("{invalid json") + with file.open("r") as f: + with pytest.raises(ValueError): + read_test_results(f) + + +def test_filter_new_rows_duplicates(): + df = pd.DataFrame({"id": ["a", "b", "b", "c"]}) + filtered, count = filter_new_rows(df, ["b", "d"]) + assert list(filtered["id"]) == ["a", "c"] + assert count == 2 diff --git a/tests/test_file_writer_utils.py b/tests/test_file_writer_utils.py new file mode 100644 index 0000000..db5a08d --- /dev/null +++ b/tests/test_file_writer_utils.py @@ -0,0 +1,22 @@ +import json +import os +import pandas as pd +from utils.file_writer_utils import write_dataset + + +def test_write_dataset_csv(tmp_path): + df = pd.DataFrame([{"a": 1, "b": 2}]) + path = tmp_path / "out.csv" + write_dataset(df, path) + assert path.exists() + loaded = pd.read_csv(path) + assert loaded.iloc[0]["a"] == 1 + + +def test_write_dataset_json(tmp_path): + data = [{"a": 1}, {"a": 2}] + path = tmp_path / "out.json" + write_dataset(data, path) + with open(path, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded[1]["a"] == 2 diff --git a/tests/test_gestione_domande_view.py b/tests/test_gestione_domande_view.py new file mode 100644 index 0000000..c78896e --- /dev/null +++ b/tests/test_gestione_domande_view.py @@ -0,0 +1,206 @@ +import os +import sys +import importlib +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views.state_models import QuestionPageState + + +@pytest.fixture +def gestione_domande(monkeypatch): + import controllers + import importlib.util + from pathlib import Path + + monkeypatch.setattr(controllers, "load_questions", lambda: pd.DataFrame()) + + base_path = Path(__file__).resolve().parents[1] / "views" + style_spec = importlib.util.spec_from_file_location("views.style_utils", base_path / "style_utils.py") + style_mod = importlib.util.module_from_spec(style_spec) + sys.modules["views.style_utils"] = style_mod + style_spec.loader.exec_module(style_mod) + + module_path = base_path / "gestione_domande.py" + spec = importlib.util.spec_from_file_location("views.gestione_domande", module_path) + module = importlib.util.module_from_spec(spec) + sys.modules["views.gestione_domande"] = module + spec.loader.exec_module(module) + return module + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.button_returns: list[bool] = [] + self.rerun_called = False + + def write(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def success(self, *args, **kwargs): + pass + + def button(self, *args, **kwargs): + if self.button_returns: + return self.button_returns.pop(0) + return False + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def rerun(self): + self.rerun_called = True + + +def _setup(monkeypatch, gestione_domande): + dummy_st = DummySt() + monkeypatch.setattr(gestione_domande, "st", dummy_st) + return dummy_st + + +def test_create_save_question_callback_success(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + questions_df = pd.DataFrame({"id": [1]}) + + def fake_save_question_action(*_args): + return {"success": True, "questions_df": questions_df} + + monkeypatch.setattr( + gestione_domande, "save_question_action", fake_save_question_action + ) + + cb = gestione_domande.create_save_question_callback("1", "q", "a", "cat") + cb() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_success is True + assert state.save_success_message == "Domanda salvata." + assert state.trigger_rerun is True + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_create_save_question_callback_failure(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + + def fake_save_question_action(*_args): + return {"success": False} + + monkeypatch.setattr( + gestione_domande, "save_question_action", fake_save_question_action + ) + + cb = gestione_domande.create_save_question_callback("1", "q", "a", "cat") + cb() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_error is True + assert state.save_error_message == "Domanda non salvata." + assert state.trigger_rerun is False + + +def test_import_questions_callback_success(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + dummy_st.session_state.uploaded_file_content = object() + questions_df = pd.DataFrame({"id": [1]}) + + def fake_import_questions_action(_file): + return {"questions_df": questions_df, "imported_count": 2, "warnings": []} + + monkeypatch.setattr( + gestione_domande, "import_questions_action", fake_import_questions_action + ) + + gestione_domande.import_questions_callback() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.import_success is True + assert "2" in state.import_success_message + assert dummy_st.session_state.upload_questions_file is None + assert "upload_questions_file" not in dummy_st.session_state + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_import_questions_callback_error(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + dummy_st.session_state.uploaded_file_content = object() + + def fake_import_questions_action(_file): + raise Exception("bad") + + monkeypatch.setattr( + gestione_domande, "import_questions_action", fake_import_questions_action + ) + + gestione_domande.import_questions_callback() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.import_error is True + assert state.import_error_message == "bad" + assert "upload_questions_file" not in dummy_st.session_state + assert dummy_st.session_state.uploaded_file_content is None + + +def test_confirm_delete_question_dialog_success(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + dummy_st.button_returns = [True, False] + questions_df = pd.DataFrame({"id": [1]}) + + def fake_delete_question_action(_id): + return questions_df + + monkeypatch.setattr( + gestione_domande, "delete_question_action", fake_delete_question_action + ) + + gestione_domande.confirm_delete_question_dialog.__wrapped__(1, "q1") + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.delete_success is True + assert state.trigger_rerun is True + assert dummy_st.rerun_called is True + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_confirm_delete_question_dialog_error(monkeypatch, gestione_domande): + dummy_st = _setup(monkeypatch, gestione_domande) + dummy_st.button_returns = [True, False] + + def fake_delete_question_action(_id): + raise Exception("fail") + + monkeypatch.setattr( + gestione_domande, "delete_question_action", fake_delete_question_action + ) + + gestione_domande.confirm_delete_question_dialog.__wrapped__(1, "q1") + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_error is True + assert state.save_error_message == "fail" + assert dummy_st.rerun_called is True + diff --git a/tests/test_gestione_set_view.py b/tests/test_gestione_set_view.py new file mode 100644 index 0000000..e37f63b --- /dev/null +++ b/tests/test_gestione_set_view.py @@ -0,0 +1,187 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import set_helpers +from views.state_models import SetPageState +from models.question_set import PersistSetsResult + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.button_returns: list[bool] = [] + self.rerun_called = False + + def write(self, *args, **kwargs): + pass + + def button(self, *args, **kwargs): + if self.button_returns: + return self.button_returns.pop(0) + return False + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def warning(self, *args, **kwargs): + pass + + def rerun(self): + self.rerun_called = True + + +def _setup(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(set_helpers, "st", dummy_st) + return dummy_st + + +def test_create_save_set_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.set_expanders = {} + dummy_st.session_state.question_checkboxes = {"1": {"2": True}} + dummy_st.session_state.newly_selected_questions = {"1": ["3"]} + dummy_st.session_state.set_name_1 = "Name" + state = SetPageState() + + captured = {} + + def fake_save_set_callback(set_id, name, options, new_ids, st_state): + captured["args"] = (set_id, name, options, new_ids) + st_state.save_set_success = True + + monkeypatch.setattr(set_helpers, "save_set_callback", fake_save_set_callback) + + cb = set_helpers.create_save_set_callback("1", "exp1", state) + cb() + + assert dummy_st.session_state.set_expanders["exp1"] is True + assert captured["args"] == ( + "1", + "Name", + {"2": True}, + ["3"], + ) + assert state.save_set_success is True + + +def test_create_save_set_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.set_expanders = {} + dummy_st.session_state.question_checkboxes = {"1": {}} + dummy_st.session_state.newly_selected_questions = {"1": []} + dummy_st.session_state.set_name_1 = "Name" + state = SetPageState() + + def fake_save_set_callback(set_id, name, options, new_ids, st_state): + st_state.save_set_error = True + st_state.save_set_error_message = "boom" + + monkeypatch.setattr(set_helpers, "save_set_callback", fake_save_set_callback) + + cb = set_helpers.create_save_set_callback("1", "exp1", state) + cb() + + assert dummy_st.session_state.set_expanders["exp1"] is True + assert state.save_set_error is True + assert state.save_set_error_message == "boom" + + +def test_import_set_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + + result = PersistSetsResult( + sets_df=pd.DataFrame({"id": [1]}), + questions_df=pd.DataFrame({"id": [2]}), + sets_imported_count=1, + new_questions_added_count=0, + existing_questions_found_count=0, + warnings=["warn"], + ) + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _f: result) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_success is True + assert state.import_set_success_message == "1 set importati." + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + assert dummy_st.session_state.uploaded_file_content_set is None + + +def test_import_set_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + dummy_st.session_state.upload_set_file = object() + + def fake_import_from_file(_f): + raise Exception("fail") + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", fake_import_from_file) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_error is True + assert state.import_set_error_message == "fail" + assert dummy_st.session_state.uploaded_file_content_set is None + assert "upload_set_file" not in dummy_st.session_state + + +def test_confirm_delete_set_dialog_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + state = SetPageState() + + def fake_delete_set_callback(set_id, st_state): + st_state.delete_set_success = True + dummy_st.session_state.question_sets = pd.DataFrame({"id": [1]}) + + monkeypatch.setattr(set_helpers, "delete_set_callback", fake_delete_set_callback) + + set_helpers.confirm_delete_set_dialog.__wrapped__("1", "name", state) + + assert state.delete_set_success is True + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert dummy_st.rerun_called is True + + +def test_confirm_delete_set_dialog_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + state = SetPageState() + + def fake_delete_set_callback(set_id, st_state): + st_state.save_set_error = True + st_state.save_set_error_message = "bad" + + monkeypatch.setattr(set_helpers, "delete_set_callback", fake_delete_set_callback) + + set_helpers.confirm_delete_set_dialog.__wrapped__("1", "name", state) + + assert state.save_set_error is True + assert state.save_set_error_message == "bad" + assert dummy_st.rerun_called is True + diff --git a/tests/test_home_view.py b/tests/test_home_view.py new file mode 100644 index 0000000..80a8d6a --- /dev/null +++ b/tests/test_home_view.py @@ -0,0 +1,47 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import home + + +class DummyColumn: + def __init__(self, parent): + self.parent = parent + + def markdown(self, text, **kwargs): + self.parent.markdown(text, **kwargs) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.markdown_calls = [] + + def markdown(self, text, **kwargs): + self.markdown_calls.append(text) + + def columns(self, n): + return (DummyColumn(self), DummyColumn(self)) + + +def test_home_render_injects_styles_and_content(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(home, 'st', dummy_st) + + called = {'home_styles': False} + + def fake_add_home_styles(): + called['home_styles'] = True + + monkeypatch.setattr(home, 'add_home_styles', fake_add_home_styles) + + home.render() + + assert called['home_styles'] is True + assert any('Piattaforma di Valutazione LLM' in m for m in dummy_st.markdown_calls) diff --git a/tests/test_import_results.py b/tests/test_import_results.py new file mode 100644 index 0000000..eeb0b85 --- /dev/null +++ b/tests/test_import_results.py @@ -0,0 +1,32 @@ +import os +import sys + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.test_result import TestResult, test_result_importer + + +data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + +@pytest.mark.parametrize("filename", ["test_results.csv", "test_results.json"]) +def test_import_from_file_skips_duplicates_and_saves(mocker, filename): + mock_load = mocker.patch("models.test_result.TestResult.load_all_df") + mock_save = mocker.patch("models.test_result.TestResult.save") + mock_refresh = mocker.patch("models.test_result.TestResult.refresh_cache") + mock_load.return_value = pd.DataFrame( + [{"id": "1", "set_id": "s1", "timestamp": "t0", "results": {}}] + ) + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + result = test_result_importer.import_from_file(f) + + assert result["success"] is True + assert result["message"] == "Importati 1 risultati." + mock_save.assert_called_once() + saved = mock_save.call_args[0][0] + assert {r.id for r in saved} == {"1", "2"} + mock_refresh.assert_called_once() + diff --git a/tests/test_importer_export.py b/tests/test_importer_export.py new file mode 100644 index 0000000..2432d01 --- /dev/null +++ b/tests/test_importer_export.py @@ -0,0 +1,44 @@ +import pandas as pd +from models.question import Question, question_importer +from models.question_set import QuestionSet, question_set_importer +from models.test_result import TestResult, test_result_importer + + +def test_question_gather_data(mocker): + mocker.patch( + "models.question.Question.load_all", + return_value=[Question(id="1", domanda="d", risposta_attesa="a", categoria="c")], + ) + df = question_importer.gather_data() + assert df.to_dict(orient="records") == [ + {"id": "1", "domanda": "d", "risposta_attesa": "a", "categoria": "c"} + ] + + +def test_question_set_gather_data(mocker): + mocker.patch( + "models.question.Question.load_all", + return_value=[Question(id="1", domanda="d", risposta_attesa="a", categoria="c")], + ) + mocker.patch( + "models.question_set.QuestionSet.load_all", + return_value=[QuestionSet(id="s1", name="S1", questions=["1"])] + ) + data = question_set_importer.gather_data() + assert data == [ + {"name": "S1", "questions": [ + {"id": "1", "domanda": "d", "risposta_attesa": "a", "categoria": "c"} + ]} + ] + + +def test_test_result_gather_data(mocker): + df = pd.DataFrame([ + {"id": "1", "set_id": "s", "timestamp": "t", "results": {}} + ]) + mocker.patch( + "models.test_result.TestResult.load_all_df", + return_value=df, + ) + result = test_result_importer.gather_data() + assert result.equals(df) diff --git a/tests/test_initialize_db.py b/tests/test_initialize_db.py new file mode 100644 index 0000000..cd26608 --- /dev/null +++ b/tests/test_initialize_db.py @@ -0,0 +1,32 @@ +import logging +import runpy + + +def test_initialize_db_logs_success(mocker, caplog): + mock_engine = mocker.MagicMock() + mocker.patch("models.database.DatabaseEngine.instance", return_value=mock_engine) + mocker.patch("utils.startup_utils.setup_logging") + caplog.set_level(logging.INFO) + + runpy.run_module("initialize_db", run_name="__main__") + + mock_engine.init_db.assert_called_once_with() + assert any( + "Database inizializzato con successo" in record.message for record in caplog.records + ) + + +def test_initialize_db_logs_error(mocker, caplog): + mock_engine = mocker.MagicMock() + mock_engine.init_db.side_effect = Exception("boom") + mocker.patch("models.database.DatabaseEngine.instance", return_value=mock_engine) + mocker.patch("utils.startup_utils.setup_logging") + caplog.set_level(logging.ERROR) + + runpy.run_module("initialize_db", run_name="__main__") + + mock_engine.init_db.assert_called_once_with() + assert any( + "Errore durante l'inizializzazione del database" in record.message + for record in caplog.records + ) diff --git a/tests/test_models_cached_data.py b/tests/test_models_cached_data.py new file mode 100644 index 0000000..ebe304d --- /dev/null +++ b/tests/test_models_cached_data.py @@ -0,0 +1,49 @@ +import models.cached_data as cached_data + + +def test_get_questions(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return [1] + + monkeypatch.setattr(cached_data.Question, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_questions() == [1] + assert called + + +def test_get_question_sets(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['set'] + + monkeypatch.setattr(cached_data.QuestionSet, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_question_sets() == ['set'] + assert called + + +def test_get_api_presets(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['preset'] + + monkeypatch.setattr(cached_data.APIPreset, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_api_presets() == ['preset'] + assert called + + +def test_get_results(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['result'] + + monkeypatch.setattr(cached_data.TestResult, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_results() == ['result'] + assert called diff --git a/tests/test_models_database.py b/tests/test_models_database.py new file mode 100644 index 0000000..cabfed8 --- /dev/null +++ b/tests/test_models_database.py @@ -0,0 +1,48 @@ +import pytest +from types import SimpleNamespace + +from models import database +from models.database import DatabaseEngine + + +def test_get_engine_uses_config_and_create_engine(monkeypatch): + DatabaseEngine.reset_instance() # assicura un singleton pulito + db = DatabaseEngine.instance() + fake_cfg = {'user': 'u', 'password': 'p', 'host': 'h', 'database': 'db'} + monkeypatch.setattr(DatabaseEngine, '_load_config', lambda self: fake_cfg) + called = {} + + def fake_ensure(self, cfg): + called['ensure'] = cfg + monkeypatch.setattr(DatabaseEngine, '_ensure_database', fake_ensure) + fake_engine = SimpleNamespace(dispose=lambda: None) + + def fake_create_engine(url, pool_pre_ping=True, pool_recycle=3600): + called['url'] = url + return fake_engine + monkeypatch.setattr(database, 'create_engine', fake_create_engine) + + engine = db.get_engine() + assert engine is fake_engine + assert called['ensure'] == fake_cfg + assert 'mysql+pymysql://u:p@h:3306/db' in called['url'] + # la seconda chiamata dovrebbe riutilizzare lo stesso engine + assert db.get_engine() is fake_engine + + +def test_ensure_database_error(monkeypatch): + DatabaseEngine.reset_instance() + db = DatabaseEngine.instance() + + class DummyEngine: + def begin(self): + raise Exception('boom') + + monkeypatch.setattr(database, 'create_engine', lambda *a, **k: DummyEngine()) + with pytest.raises(RuntimeError): + db._ensure_database({ + 'user': 'u', + 'password': 'p', + 'host': 'h', + 'database': 'd', + }) diff --git a/tests/test_models_orm.py b/tests/test_models_orm.py new file mode 100644 index 0000000..3aa12d1 --- /dev/null +++ b/tests/test_models_orm.py @@ -0,0 +1,92 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +from models.orm_models import ( + Base, + QuestionORM, + QuestionSetORM, + TestResultORM as ResultORM, + APIPresetORM, + question_set_questions, +) + + +def test_orm_tables_and_relationships(): + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + + with Session(engine) as session: + question = QuestionORM( + id="q1", + domanda="2+2?", + risposta_attesa="4", + categoria="math", + ) + qset = QuestionSetORM(id="s1", name="Sample Set", questions=[question]) + result = ResultORM( + id="r1", + set_id="s1", + timestamp="2024-01-01T00:00:00", + results={"q1": "4"}, + ) + preset = APIPresetORM( + id="a1", + name="default", + provider_name="openai", + endpoint="http://api.example", + api_key="secret", + model="gpt", + temperature=0.1, + max_tokens=10, + ) + session.add_all([qset, result, preset]) + session.commit() + + assert session.get(QuestionORM, "q1").domanda == "2+2?" + assert session.get(QuestionSetORM, "s1").questions[0].id == "q1" + assert session.get(ResultORM, "r1").results == {"q1": "4"} + assert session.get(APIPresetORM, "a1").model == "gpt" + + # Column names + assert set(QuestionORM.__table__.columns.keys()) == { + "id", + "domanda", + "risposta_attesa", + "categoria", + } + assert set(QuestionSetORM.__table__.columns.keys()) == {"id", "name"} + assert set(ResultORM.__table__.columns.keys()) == { + "id", + "set_id", + "timestamp", + "results", + } + assert set(APIPresetORM.__table__.columns.keys()) == { + "id", + "name", + "provider_name", + "endpoint", + "api_key", + "model", + "temperature", + "max_tokens", + } + assert set(question_set_questions.c.keys()) == {"set_id", "question_id"} + + # Foreign keys + fk_set = list(question_set_questions.c.set_id.foreign_keys)[0] + fk_question = list(question_set_questions.c.question_id.foreign_keys)[0] + assert fk_set.column.table.name == "question_sets" + assert fk_question.column.table.name == "questions" + + # Metadata consistency + for name, table in [ + ("questions", QuestionORM.__table__), + ("question_sets", QuestionSetORM.__table__), + ("test_results", ResultORM.__table__), + ("api_presets", APIPresetORM.__table__), + ("question_set_questions", question_set_questions), + ]: + assert name in Base.metadata.tables + assert Base.metadata.tables[name] is table + diff --git a/tests/test_models_question.py b/tests/test_models_question.py new file mode 100644 index 0000000..19b4f1d --- /dev/null +++ b/tests/test_models_question.py @@ -0,0 +1,33 @@ +import pandas as pd + +from models.question import Question +from models.orm_models import QuestionORM +from models.database import DatabaseEngine + + +def test_add_and_update_question(in_memory_db): + qid = Question.add('d1', 'r1', 'c1') + with DatabaseEngine.instance().get_session() as session: + q = session.get(QuestionORM, qid) + assert q.domanda == 'd1' + assert Question.update(qid, domanda='d2', categoria='c2') is True + with DatabaseEngine.instance().get_session() as session: + q = session.get(QuestionORM, qid) + assert q.domanda == 'd2' + assert q.categoria == 'c2' + assert Question.update('missing', domanda='x') is False + + +def test_persist_entities_handles_duplicates(in_memory_db): + existing_id = Question.add('d', 'r', 'c') + df = pd.DataFrame([ + {'id': existing_id, 'domanda': 'd', 'risposta_attesa': 'r', 'categoria': 'c'}, + {'id': 'new1', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': 'c2'}, + {'id': 'new1', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': 'c2'}, + ]) + count, warnings = Question._persist_entities(df) + assert count == 1 + assert len(warnings) == 1 + assert 'già esistente' in warnings[0] + with DatabaseEngine.instance().get_session() as session: + assert session.get(QuestionORM, 'new1') is not None diff --git a/tests/test_models_question_set.py b/tests/test_models_question_set.py new file mode 100644 index 0000000..4b4cb78 --- /dev/null +++ b/tests/test_models_question_set.py @@ -0,0 +1,43 @@ +import pandas as pd + +from models.question import Question +from models.question_set import QuestionSet +from models.orm_models import QuestionSetORM +from models.database import DatabaseEngine + + +def test_create_and_update_question_set(in_memory_db): + qid1 = Question.add('d1', 'r1') + qid2 = Question.add('d2', 'r2') + set_id = QuestionSet.create('set1', [qid1]) + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + assert qset.name == 'set1' + assert [q.id for q in qset.questions] == [qid1] + QuestionSet.update(set_id, name='set2', question_ids=[qid2]) + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + assert qset.name == 'set2' + assert [q.id for q in qset.questions] == [qid2] + # update of missing set should not raise + QuestionSet.update('missing', name='x') + + +def test_resolve_question_ids(monkeypatch, in_memory_db): + current_questions = pd.DataFrame([ + {'id': '1', 'domanda': 'd1', 'risposta_attesa': 'r1', 'categoria': ''} + ]) + data = ['1', {'id': '2', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': ''}, {'id': '3'}] + monkeypatch.setattr( + 'controllers.question_controller.add_question_if_not_exists', + lambda **kwargs: True, + ) + ids, updated, new_added, existing_found, warnings = QuestionSet._resolve_question_ids( + data, current_questions + ) + assert ids == ['1', '2'] + assert new_added == 1 + assert existing_found == 1 + assert len(warnings) == 1 + assert 'saltata' in warnings[0] + assert '2' in updated['id'].values diff --git a/tests/test_models_test_result.py b/tests/test_models_test_result.py new file mode 100644 index 0000000..d28cae8 --- /dev/null +++ b/tests/test_models_test_result.py @@ -0,0 +1,35 @@ +import pandas as pd + +from models.test_result import TestResult +from models.orm_models import TestResultORM +from models.database import DatabaseEngine + + +def test_add_and_persist_entities(in_memory_db): + TestResult.load_all_df.cache_clear() + existing_id = TestResult.add('set1', {'timestamp': 't1'}) + TestResult.load_all_df.cache_clear() + df = pd.DataFrame([ + {'id': existing_id, 'set_id': 'set1', 'timestamp': 't1', 'results': {}}, + {'id': 'new', 'set_id': 'set2', 'timestamp': 't2', 'results': {}}, + ]) + added = TestResult._persist_entities(df) + assert added == 1 + with DatabaseEngine.instance().get_session() as session: + assert session.get(TestResultORM, 'new') is not None + + +def test_calculate_statistics(): + data = { + 'q1': {'question': 'Q1', 'evaluation': {'score': 1, 'similarity': 2, 'correctness': 3, 'completeness': 4}}, + 'q2': {'question': 'Q2', 'evaluation': {'score': 3, 'similarity': 6, 'correctness': 9, 'completeness': 12}}, + } + stats = TestResult.calculate_statistics(data) + assert stats['avg_score'] == 2 + assert stats['radar_metrics']['similarity'] == 4 + assert len(stats['per_question_scores']) == 2 + assert TestResult.calculate_statistics({}) == { + 'avg_score': 0, + 'per_question_scores': [], + 'radar_metrics': {'similarity': 0, 'correctness': 0, 'completeness': 0}, + } diff --git a/tests/test_openai_client.py b/tests/test_openai_client.py new file mode 100644 index 0000000..957f524 --- /dev/null +++ b/tests/test_openai_client.py @@ -0,0 +1,73 @@ +import logging +import os +import sys +from types import SimpleNamespace + +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.openai_client import ( # noqa: E402 + DEFAULT_MODEL, + ClientCreationError, + get_available_models_for_endpoint, + get_openai_client, +) + + +def test_get_openai_client_no_api_key(caplog): + caplog.set_level(logging.WARNING) + with pytest.raises(ClientCreationError): + get_openai_client("") + assert "Tentativo di creare client OpenAI senza chiave API." in caplog.text + + +def test_get_openai_client_creation_failure(mocker): + mock_openai = mocker.patch("utils.openai_client.OpenAI", side_effect=RuntimeError("boom")) + with pytest.raises(ClientCreationError): + get_openai_client("key") + mock_openai.assert_called_once() + + +def test_get_openai_client_uses_custom_base_url(mocker): + mock_openai = mocker.patch("utils.openai_client.OpenAI") + mock_client = mocker.MagicMock() + mock_openai.return_value = mock_client + + result = get_openai_client("key", base_url="http://custom") + + mock_openai.assert_called_once_with(api_key="key", base_url="http://custom") + assert result is mock_client + + +def test_get_available_models_returns_error_when_no_client(mocker): + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) + models = get_available_models_for_endpoint( + "Personalizzato", endpoint_url="http://endpoint", api_key="key" + ) + assert models[0] == "(Errore creazione client API)" + assert DEFAULT_MODEL in models + + +def test_get_available_models_filters_embeddings(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + dummy_models = [ + SimpleNamespace(id="gpt-4o"), + SimpleNamespace(id="text-embedding-3-small"), + SimpleNamespace(id="chat-model"), + SimpleNamespace(id="my-embedding-model"), + ] + + dummy_client = SimpleNamespace(models=SimpleNamespace(list=lambda: dummy_models)) + mock_get_client.return_value = dummy_client + + models = get_available_models_for_endpoint( + "Personalizzato", endpoint_url="http://endpoint", api_key="key" + ) + + assert "text-embedding-3-small" not in models + assert "my-embedding-model" not in models + assert "gpt-4o" in models and "chat-model" in models diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py new file mode 100644 index 0000000..8896174 --- /dev/null +++ b/tests/test_openai_controllers.py @@ -0,0 +1,97 @@ +import os +import sys + +import pytest + +from utils.openai_client import ClientCreationError + +# Aggiunge la cartella principale al percorso dei moduli per i test +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import api_preset_controller # noqa: E402 +from controllers.test_controller import generate_answer # noqa: E402 + + +def _mock_response(mocker, content: str): + """Crea una risposta simulata con il contenuto fornito.""" + mock_resp = mocker.Mock() + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +def test_generate_answer_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + mocker, " answer " + ) + + result = generate_answer("question", {"api_key": "key"}) + + assert result == "answer" + + +def test_generate_answer_no_client(mocker): + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) + with pytest.raises(ValueError): + generate_answer("question", {"api_key": None}) + + +def test_generate_answer_empty_question(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_get_client.return_value = mocker.Mock() + + with pytest.raises(ValueError): + generate_answer("", {"api_key": "key"}) + + +def test_test_api_connection_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + mocker, "Connessione riuscita." + ) + + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is True + assert msg == "Connessione API riuscita!" + + +def test_test_api_connection_unexpected_response(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + mocker, "failure" + ) + + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Risposta inattesa" in msg + + +def test_test_api_connection_no_client(mocker): + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Client API non inizializzato" in msg diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py new file mode 100644 index 0000000..a1fe66f --- /dev/null +++ b/tests/test_question_controller.py @@ -0,0 +1,276 @@ +import os +import sys + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import question_controller # noqa: E402 + + +def test_add_question_if_not_exists_existing(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) + mock_load_questions.return_value = pd.DataFrame({"id": ["123"]}) + + result = question_controller.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", + ) + + assert result is False + mock_add.assert_not_called() + mock_refresh.assert_not_called() + + +def test_add_question_if_not_exists_new(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) + mock_load_questions.return_value = pd.DataFrame({"id": ["456"]}) + + result = question_controller.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", + ) + + assert result is True + mock_add.assert_called_once_with("dom", "ans", "cat", "123") + mock_refresh.assert_called_once() + + +def test_add_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") + mock_add.return_value = "qid" + + result = question_controller.add_question("dom", "ans", "cat", "qid") + + assert result == "qid" + mock_add.assert_called_once_with("dom", "ans", "cat", "qid") + mock_refresh.assert_called_once() + + +def test_update_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.Question.update") + mock_update.return_value = True + + result = question_controller.update_question("qid", "dom", "ans", "cat") + + assert result is True + mock_update.assert_called_once_with("qid", "dom", "ans", "cat") + mock_refresh.assert_called_once() + + +def test_delete_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_delete = mocker.patch("controllers.question_controller.Question.delete") + question_controller.delete_question("qid") + + mock_delete.assert_called_once_with("qid") + mock_refresh.assert_called_once() +def test_get_filtered_questions(mocker): + mock_filter = mocker.patch( + "controllers.question_controller.Question.filter_by_category" + ) + df = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + "categoria": ["cat1"], + } + ) + mock_filter.return_value = (df, ["cat1", "cat2"]) + + questions, categories = question_controller.get_filtered_questions("cat1") + mock_filter.assert_called_once_with("cat1") + assert categories == ["cat1", "cat2"] + assert questions["id"].tolist() == ["1"] + + +def test_filter_by_category(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") + mock_get_questions.return_value = pd.DataFrame( + { + "id": ["1", "2"], + "domanda": ["d1", "d2"], + "risposta_attesa": ["a1", "a2"], + "categoria": ["cat1", "cat2"], + } + ) + + filtered_df, categories = question_controller.Question.filter_by_category("cat1") + assert categories == ["cat1", "cat2"] + assert filtered_df["id"].tolist() == ["1"] + + +def test_filter_by_category_no_category_column(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") + mock_get_questions.return_value = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + } + ) + + filtered_df, categories = question_controller.Question.filter_by_category() + assert "categoria" in filtered_df.columns + assert filtered_df.iloc[0]["categoria"] == "N/A" + assert categories == ["N/A"] + + +def test_filter_by_category_empty_df(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") + mock_get_questions.return_value = pd.DataFrame() + + filtered_df, categories = question_controller.Question.filter_by_category() + assert filtered_df.empty + assert categories == [] + + +def test_export_questions_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.question_controller.question_importer.export_to_file" + ) + dest = tmp_path / "qs.csv" + question_controller.export_questions_action(dest) + mock_export.assert_called_once_with(dest) + + +def test_get_question_text_found(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") + mock_load.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) + text = question_controller.get_question_text("1") + mock_refresh.assert_not_called() + assert text == "Q1" + + +def test_get_question_text_refresh(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") + mock_load.return_value = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) + text = question_controller.get_question_text("1") + mock_refresh.assert_called_once() + assert text == "Q1" + + +def test_get_question_category_found(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") + mock_load.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) + cat = question_controller.get_question_category("1") + mock_refresh.assert_not_called() + assert cat == "C1" + + +def test_get_question_category_refresh(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") + mock_load.return_value = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) + cat = question_controller.get_question_category("1") + mock_refresh.assert_called_once() + assert cat == "C1" + + +def test_save_question_action_success(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.update_question") + mock_update.return_value = True + df = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = df + + result = question_controller.save_question_action("1", "q", "a", "c") + + mock_update.assert_called_once_with( + "1", domanda="q", risposta_attesa="a", categoria="c" + ) + mock_refresh.assert_called_once() + assert result["success"] is True + assert result["questions_df"].equals(df) + + +def test_save_question_action_failure(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.update_question") + mock_update.return_value = False + result = question_controller.save_question_action("1", "q", "a", "c") + + mock_refresh.assert_not_called() + assert result["success"] is False + assert result["questions_df"] is None + + +def test_delete_question_action(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_delete = mocker.patch("controllers.question_controller.delete_question") + df = pd.DataFrame() + mock_refresh.return_value = df + + result = question_controller.delete_question_action("1") + + mock_delete.assert_called_once_with("1") + mock_refresh.assert_called_once() + assert result.equals(df) + + +def test_import_questions_action_success(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_import = mocker.patch( + "controllers.question_controller.question_importer.import_from_file" + ) + mock_import.return_value = { + "success": True, + "imported_count": 1, + "warnings": ["w"], + } + df = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = df + + uploaded_file = object() + result = question_controller.import_questions_action(uploaded_file) + + mock_import.assert_called_once_with(uploaded_file) + mock_refresh.assert_called_once() + assert result["imported_count"] == 1 + assert result["warnings"] == ["w"] + assert result["questions_df"].equals(df) + + +def test_import_questions_action_no_file(): + with pytest.raises(ValueError, match="Nessun file caricato."): + question_controller.import_questions_action(None) + + +def test_import_questions_action_failure(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_import = mocker.patch( + "controllers.question_controller.question_importer.import_from_file" + ) + mock_import.return_value = { + "success": False, + "imported_count": 0, + "warnings": ["err"], + } + + with pytest.raises(ValueError, match="err"): + question_controller.import_questions_action(object()) + + assert mock_import.return_value["imported_count"] == 0 + assert mock_import.return_value["warnings"] == ["err"] + mock_refresh.assert_not_called() diff --git a/tests/test_question_import.py b/tests/test_question_import.py new file mode 100644 index 0000000..50b4077 --- /dev/null +++ b/tests/test_question_import.py @@ -0,0 +1,64 @@ +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.question import question_importer + + +class DummySession: + def __init__(self): + self.inserted = [] + + def execute(self, *_args, **_kwargs): + class Result: + def scalars(self_inner): + class Scal: + def all(self_inner2): + return ["q1"] + return Scal() + return Result() + + def bulk_insert_mappings(self, _orm, data): + self.inserted.extend(data) + + def commit(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + pass + + +class DummyEngine: + def __init__(self): + self.session = DummySession() + + def get_session(self): + return self.session + + +def test_import_from_file_skips_duplicates_and_adds_new(mocker): + engine = DummyEngine() + mock_engine = mocker.patch("models.question.DatabaseEngine.instance") + mock_engine.return_value = engine + data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + for filename in ["questions.csv", "questions.json"]: + engine.session.inserted.clear() + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + result = question_importer.import_from_file(f) + assert result["success"] is True + assert result["imported_count"] == 1 + assert any("q1" in w for w in result["warnings"]) + assert engine.session.inserted == [ + { + "id": "q2", + "domanda": "New question?", + "risposta_attesa": "Answer2", + "categoria": "cat2", + } + ] + diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py new file mode 100644 index 0000000..b8ec97e --- /dev/null +++ b/tests/test_question_set_controller.py @@ -0,0 +1,93 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import question_set_controller # noqa: E402 + + +def test_create_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_create = mocker.patch( + "controllers.question_set_controller.QuestionSet.create" + ) + mock_create.return_value = "sid" + + result = question_set_controller.create_set("name", ["q1"]) + + assert result == "sid" + mock_create.assert_called_once_with("name", ["q1"]) + mock_refresh.assert_called_once() + + +def test_update_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_update = mocker.patch( + "controllers.question_set_controller.QuestionSet.update" + ) + question_set_controller.update_set("sid", name="name", question_ids=["q1"]) + + mock_update.assert_called_once_with("sid", "name", ["q1"]) + mock_refresh.assert_called_once() + + +def test_delete_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_delete = mocker.patch( + "controllers.question_set_controller.QuestionSet.delete" + ) + question_set_controller.delete_set("sid") + + mock_delete.assert_called_once_with("sid") + mock_refresh.assert_called_once() + + +def test_prepare_sets_for_view(mocker): + mock_get_sets = mocker.patch( + "controllers.question_set_controller._get_question_sets" + ) + mock_get_questions = mocker.patch( + "controllers.question_set_controller._get_questions" + ) + questions_df = pd.DataFrame( + { + "id": ["1", "2"], + "domanda": ["d1", "d2"], + "risposta_attesa": ["a1", "a2"], + "categoria": ["A", "B"], + } + ) + sets_df = pd.DataFrame( + { + "id": ["s1", "s2"], + "name": ["set1", "set2"], + "questions": [["1"], ["2"]], + } + ) + + mock_get_questions.return_value = questions_df + mock_get_sets.return_value = sets_df + + result = question_set_controller.prepare_sets_for_view(["A"]) + + assert result["categories"] == ["A", "B"] + assert result["sets_df"]["id"].tolist() == ["s1"] + assert result["sets_df"].iloc[0]["questions_detail"] == [ + {"id": "1", "domanda": "d1", "categoria": "A"} + ] + + +def test_export_sets_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.question_set_controller.question_set_importer.export_to_file" + ) + dest = tmp_path / "sets.csv" + question_set_controller.export_sets_action(dest) + mock_export.assert_called_once_with(dest) diff --git a/tests/test_question_set_import.py b/tests/test_question_set_import.py new file mode 100644 index 0000000..8be591f --- /dev/null +++ b/tests/test_question_set_import.py @@ -0,0 +1,48 @@ +import os +import sys + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.question_set import QuestionSet + + +data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + +@pytest.mark.parametrize("filename", ["question_sets.json", "question_sets.csv"]) +def test_import_from_file_handles_duplicates(mocker, filename): + mock_refresh = mocker.patch( + "utils.cache.refresh_question_sets", return_value=pd.DataFrame() + ) + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) + mock_load_sets = mocker.patch( + "controllers.question_set_controller.load_sets" + ) + mock_create = mocker.patch("models.question_set.QuestionSet.create") + mock_add_question = mocker.patch( + "controllers.question_controller.add_question_if_not_exists" + ) + mock_load_questions.return_value = pd.DataFrame( + {"id": ["q1"], "domanda": ["Existing"], "risposta_attesa": ["A1"], "categoria": ["cat1"]} + ) + mock_load_sets.return_value = pd.DataFrame( + {"id": ["s1"], "name": ["Set1"], "questions": [[]]} + ) + mock_add_question.side_effect = lambda question_id, domanda, risposta_attesa, categoria: question_id == "q2" + + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + result = QuestionSet.import_from_file(f) + + assert result.sets_imported_count == 1 + assert result.new_questions_added_count == 1 + assert result.existing_questions_found_count == 1 + assert any("Set1" in w for w in result.warnings) + assert any("senza ID" in w for w in result.warnings) + mock_create.assert_called_once_with("Set2", ["q1", "q2"]) + assert mock_add_question.call_count == 1 + diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py new file mode 100644 index 0000000..fd24e6b --- /dev/null +++ b/tests/test_question_set_importer.py @@ -0,0 +1,133 @@ +import io +import json + +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import pandas as pd # noqa: E402 +import pytest # noqa: E402 + +from models.question_set import ( + QuestionSet, + PersistSetsResult, + question_set_importer, +) +from utils.file_reader_utils import read_question_sets + + +def test_read_question_sets_csv_missing_columns(): + csv_content = "name,id,domanda\nset1,1,Domanda" + file = io.StringIO(csv_content) + file.name = "test.csv" + with pytest.raises(ValueError): + read_question_sets(file) + + +def test_read_question_sets_json_not_list(): + data = {"name": "set1"} + file = io.BytesIO(json.dumps(data).encode("utf-8")) + file.name = "test.json" + with pytest.raises(ValueError): + read_question_sets(file) + + +def test_resolve_question_ids_adds_and_existing(mocker): + mock_add = mocker.patch( + "controllers.question_controller.add_question_if_not_exists" + ) + mock_add.return_value = True + current_questions = pd.DataFrame( + [{"id": "2", "domanda": "", "risposta_attesa": "", "categoria": ""}] + ) + questions = [ + {"id": "1", "domanda": "Q1", "risposta_attesa": "A1", "categoria": ""}, + {"id": "2"}, + ] + ( + ids, + updated_df, + new_added, + existing_found, + warnings, + ) = QuestionSet._resolve_question_ids(questions, current_questions) + assert ids == ["1", "2"] + assert new_added == 1 + assert existing_found == 1 + assert warnings == [] + assert "1" in updated_df["id"].values + mock_add.assert_called_once() + + +def test_resolve_question_ids_missing_id(): + current_questions = pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ) + questions = [{"domanda": "Q", "risposta_attesa": "A"}] + ( + ids, + updated_df, + new_added, + existing_found, + warnings, + ) = QuestionSet._resolve_question_ids(questions, current_questions) + assert ids == [] + assert new_added == 0 + assert existing_found == 0 + assert len(warnings) == 1 + assert updated_df.empty + + +def test_persist_sets_skips_duplicates(mocker): + mock_refresh = mocker.patch("utils.cache.refresh_question_sets") + mock_create = mocker.patch("models.question_set.QuestionSet.create") + mock_refresh.return_value = pd.DataFrame( + [{"id": "s1", "name": "Existing", "questions": []}] + ) + current_questions = pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ) + current_sets = pd.DataFrame( + [{"id": "s1", "name": "Existing", "questions": []}] + ) + sets_data = [ + {"name": "Existing", "questions": []}, + {"name": "New", "questions": []}, + ] + result = QuestionSet._persist_entities(sets_data, current_questions, current_sets) + assert result.sets_imported_count == 1 + assert result.new_questions_added_count == 0 + assert result.existing_questions_found_count == 0 + assert any("esiste già" in w for w in result.warnings) + mock_create.assert_called_once_with("New", []) + + +def test_question_set_importer_invokes_helpers(mocker): + mock_reader = mocker.patch( + "models.question_set.read_question_sets", + return_value=[{"name": "Sample", "questions": []}], + ) + mock_persist = mocker.patch( + "models.question_set.QuestionSet._persist_entities", + return_value="ok", + ) + current_questions = pd.DataFrame() + current_sets = pd.DataFrame() + mocker.patch( + "controllers.question_controller.load_questions", + return_value=current_questions, + ) + mocker.patch( + "controllers.question_set_controller.load_sets", + return_value=current_sets, + ) + + file = io.StringIO("[]") + file.name = "data.json" + result = question_set_importer.import_from_file(file) + + mock_reader.assert_called_once_with(file) + mock_persist.assert_called_once_with( + [{"name": "Sample", "questions": []}], current_questions, current_sets + ) + assert result == "ok" diff --git a/tests/test_register_page.py b/tests/test_register_page.py new file mode 100644 index 0000000..071fb14 --- /dev/null +++ b/tests/test_register_page.py @@ -0,0 +1,20 @@ +import pytest + +from views import register_page, page_registry + + +def test_register_page_prevents_duplicates(): + page_registry.clear() + + @register_page("Example") + def first(): + return "first" + + assert page_registry["Example"] is first + + with pytest.raises(ValueError): + @register_page("Example") + def second(): # pragma: no cover - funzione non registrata + return "second" + + assert page_registry["Example"] is first diff --git a/tests/test_result_controller.py b/tests/test_result_controller.py new file mode 100644 index 0000000..0ae0a2e --- /dev/null +++ b/tests/test_result_controller.py @@ -0,0 +1,92 @@ +import os +import sys + +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import result_controller as controller # noqa: E402 + + +def sample_data(): + results_df = pd.DataFrame( + [ + { + "id": "1", + "set_id": "10", + "timestamp": "2024-01-01", + "results": { + "generation_llm": "gpt-3.5", + "avg_score": 80, + "method": "LLM", + }, + }, + { + "id": "2", + "set_id": "20", + "timestamp": "2024-01-02", + "results": { + "generation_preset": "presetA", + "avg_score": 70, + "method": "LLM", + }, + }, + { + "id": "3", + "set_id": "10", + "timestamp": "2024-01-03", + "results": { + "generation_llm": "gpt-4", + "avg_score": 90, + "method": "LLM", + }, + }, + ] + ) + sets_df = pd.DataFrame( + [ + {"id": "10", "name": "Set1"}, + {"id": "20", "name": "Set2"}, + ] + ) + presets_df = pd.DataFrame( + [ + {"name": "presetA", "model": "gpt-3.5"}, + ] + ) + return results_df, sets_df, presets_df + + +def test_get_results_filters(mocker): + results_df, sets_df, presets_df = sample_data() + mocker.patch("controllers.result_controller.load_results", return_value=results_df) + mocker.patch("controllers.result_controller.load_sets", return_value=sets_df) + mocker.patch("controllers.result_controller.load_presets", return_value=presets_df) + + df_set = controller.get_results("Set1", None) + assert set(df_set["id"]) == {"1", "3"} + + df_model = controller.get_results(None, "gpt-3.5") + assert set(df_model["id"]) == {"1", "2"} + + +def test_list_names(mocker): + results_df, sets_df, presets_df = sample_data() + mocker.patch("controllers.result_controller.load_presets", return_value=presets_df) + + set_names = controller.list_set_names(results_df, sets_df) + assert set_names == ["Set1", "Set2"] + + model_names = controller.list_model_names(results_df) + assert model_names == ["gpt-3.5", "gpt-4"] + + +def test_prepare_select_options(): + results_df, sets_df, _ = sample_data() + options = controller.prepare_select_options(results_df, sets_df) + expected = { + "3": "2024-01-03 - 🤖 Set1 (Avg: 90.00%) - LLM", + "2": "2024-01-02 - 🤖 Set2 (Avg: 70.00%) - LLM", + "1": "2024-01-01 - 🤖 Set1 (Avg: 80.00%) - LLM", + } + assert options == expected diff --git a/tests/test_session_state.py b/tests/test_session_state.py new file mode 100644 index 0000000..7bbcd49 --- /dev/null +++ b/tests/test_session_state.py @@ -0,0 +1,28 @@ +import pytest + +from views import session_state as ss + + +def test_initialize_session_state_writes_required_keys(monkeypatch): + fake_defaults = { + "questions": [], + "question_sets": [], + "results": [], + "api_key": "key", + "endpoint": "https://example.com", + "model": "gpt-4", + "temperature": 0.5, + "max_tokens": 1000, + } + monkeypatch.setattr(ss, "get_initial_state", lambda: fake_defaults) + monkeypatch.setattr(ss.st, "session_state", {}) + ss.initialize_session_state() + for key, value in fake_defaults.items(): + assert ss.st.session_state[key] == value + + +def test_ensure_keys_respects_existing(monkeypatch): + monkeypatch.setattr(ss.st, "session_state", {"existing": 1}) + ss.ensure_keys({"existing": 2, "missing": 3}) + assert ss.st.session_state["existing"] == 1 + assert ss.st.session_state["missing"] == 3 diff --git a/tests/test_set_helpers.py b/tests/test_set_helpers.py new file mode 100644 index 0000000..2463f12 --- /dev/null +++ b/tests/test_set_helpers.py @@ -0,0 +1,148 @@ +import os +import sys + +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views.state_models import SetPageState +from models.question_set import PersistSetsResult + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.captured_warnings: list[str] = [] + + def warning(self, msg): + self.captured_warnings.append(msg) + + +def _setup(monkeypatch): + from views import set_helpers + + dummy_st = DummySt() + monkeypatch.setattr(set_helpers, "st", dummy_st) + return set_helpers, dummy_st + + +def test_save_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + sets_df = pd.DataFrame({"id": [1]}) + + def fake_update_set(_set_id, _name, _ids): + return sets_df, "ok" + + monkeypatch.setattr(set_helpers, "update_set", fake_update_set) + + state = SetPageState() + set_helpers.save_set_callback("1", "name", {}, [], state) + + assert state.save_set_success is True + assert state.save_set_success_message == "ok" + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + + +def test_save_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + def fake_update_set(*_args, **_kwargs): + raise Exception("boom") + + monkeypatch.setattr(set_helpers, "update_set", fake_update_set) + + state = SetPageState() + set_helpers.save_set_callback("1", "name", {}, [], state) + + assert state.save_set_error is True + assert state.save_set_error_message == "boom" + assert "question_sets" not in dummy_st.session_state + + +def test_delete_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + sets_df = pd.DataFrame({"id": [1]}) + + def fake_delete_set(_set_id): + return sets_df, "deleted" + + monkeypatch.setattr(set_helpers, "delete_set", fake_delete_set) + + state = SetPageState() + set_helpers.delete_set_callback("1", state) + + assert state.delete_set_success is True + assert state.delete_set_success_message == "deleted" + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + + +def test_delete_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + def fake_delete_set(_set_id): + raise Exception("fail") + + monkeypatch.setattr(set_helpers, "delete_set", fake_delete_set) + + state = SetPageState() + set_helpers.delete_set_callback("1", state) + + assert state.save_set_error is True + assert state.save_set_error_message == "fail" + assert "question_sets" not in dummy_st.session_state + + +def test_import_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + + result = PersistSetsResult( + sets_df=pd.DataFrame({"id": [1]}), + questions_df=pd.DataFrame({"id": [2]}), + sets_imported_count=1, + new_questions_added_count=0, + existing_questions_found_count=0, + warnings=["warn"], + ) + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _f: result) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_success is True + assert state.import_set_success_message == "1 set importati." + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + assert dummy_st.captured_warnings == ["warn"] + assert dummy_st.session_state.uploaded_file_content_set is None + + +def test_import_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + dummy_st.session_state.upload_set_file = object() + + def fake_import_from_file(_f): + raise Exception("bad") + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", fake_import_from_file) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_error is True + assert state.import_set_error_message == "bad" + assert dummy_st.session_state.uploaded_file_content_set is None + assert "upload_set_file" not in dummy_st.session_state + diff --git a/tests/test_startup_controller.py b/tests/test_startup_controller.py new file mode 100644 index 0000000..e18cc3b --- /dev/null +++ b/tests/test_startup_controller.py @@ -0,0 +1,54 @@ +import os +import sys + +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import startup_controller as controller # noqa: E402 + + +def test_get_initial_state(monkeypatch): + calls = [] + + def mock_initialize_database(): + calls.append("init") + + def mock_load_default_config(): + calls.append("defaults") + return {"conf": "value"} + + def mock_get_questions(): + calls.append("questions") + return ["q1"] + + def mock_get_question_sets(): + calls.append("question_sets") + return ["qs1"] + + def mock_get_results(): + calls.append("results") + return ["r1"] + + monkeypatch.setattr(controller, "initialize_database", mock_initialize_database) + monkeypatch.setattr(controller, "load_default_config", mock_load_default_config) + monkeypatch.setattr(controller, "get_questions", mock_get_questions) + monkeypatch.setattr(controller, "get_question_sets", mock_get_question_sets) + monkeypatch.setattr(controller, "get_results", mock_get_results) + + state = controller.get_initial_state() + + assert state == { + "questions": ["q1"], + "question_sets": ["qs1"], + "results": ["r1"], + "conf": "value", + } + + assert calls == [ + "init", + "defaults", + "questions", + "question_sets", + "results", + ] diff --git a/tests/test_startup_utils.py b/tests/test_startup_utils.py new file mode 100644 index 0000000..993460b --- /dev/null +++ b/tests/test_startup_utils.py @@ -0,0 +1,39 @@ +import logging + +from utils.startup_utils import setup_logging, initialize_database, load_default_config +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + + +def test_setup_logging_creates_file(tmp_path): + log_file = tmp_path / "app.log" + root_logger = logging.getLogger() + root_logger.handlers.clear() + setup_logging(log_file=log_file) + logging.getLogger().info("hello") + assert log_file.exists() + assert "hello" in log_file.read_text() + + +def test_initialize_database_calls_init_db(monkeypatch, mocker): + dummy_engine = mocker.MagicMock() + monkeypatch.setattr( + initialize_database.__globals__["DatabaseEngine"], + "instance", + classmethod(lambda cls: dummy_engine), + ) + initialize_database() + dummy_engine.init_db.assert_called_once() + + +def test_load_default_config_returns_expected(monkeypatch, tmp_path): + db_cfg = tmp_path / "db.config" + db_cfg.write_text("[mysql]\nuser=u\npassword=p\nhost=h\ndatabase=d\n") + monkeypatch.setenv("OPENAI_API_KEY", "key") + config = load_default_config() + assert config == { + "api_key": "key", + "endpoint": DEFAULT_ENDPOINT, + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } diff --git a/tests/test_state_models.py b/tests/test_state_models.py new file mode 100644 index 0000000..72b9580 --- /dev/null +++ b/tests/test_state_models.py @@ -0,0 +1,21 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views.state_models import SetPageState, QuestionPageState + + +def test_set_page_state_defaults_and_mutability(): + state = SetPageState() + assert state.save_set_success is False + assert state.save_set_success_message == 'Set aggiornato con successo!' + state.save_set_success = True + assert state.save_set_success is True + + +def test_question_page_state_defaults_and_mutability(): + state = QuestionPageState() + assert state.save_success is False + assert state.delete_success_message == 'Domanda eliminata con successo!' + state.save_success = True + assert state.save_success is True diff --git a/tests/test_statistics.py b/tests/test_statistics.py new file mode 100644 index 0000000..6ec394b --- /dev/null +++ b/tests/test_statistics.py @@ -0,0 +1,43 @@ +import os +import sys +import pytest + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from controllers import calculate_statistics # noqa: E402 + + +def test_calculate_statistics(): + results = { + "q1": { + "question": "Domanda 1", + "evaluation": { + "score": 80, + "similarity": 70, + "correctness": 90, + "completeness": 60, + }, + }, + "q2": { + "question": "Domanda 2", + "evaluation": { + "score": 60, + "similarity": 50, + "correctness": 40, + "completeness": 80, + }, + }, + } + stats = calculate_statistics(results) + assert stats["avg_score"] == pytest.approx(70.0) + assert len(stats["per_question_scores"]) == 2 + assert {"question": "Domanda 1", "score": 80} in stats["per_question_scores"] + assert stats["radar_metrics"]["similarity"] == pytest.approx(60.0) + assert stats["radar_metrics"]["correctness"] == pytest.approx(65.0) + assert stats["radar_metrics"]["completeness"] == pytest.approx(70.0) + + +def test_calculate_statistics_empty(): + stats = calculate_statistics({}) + assert stats["avg_score"] == 0 + assert stats["per_question_scores"] == [] + assert stats["radar_metrics"] == {"similarity": 0, "correctness": 0, "completeness": 0} diff --git a/tests/test_style_utils.py b/tests/test_style_utils.py new file mode 100644 index 0000000..7f127ec --- /dev/null +++ b/tests/test_style_utils.py @@ -0,0 +1,56 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import style_utils + + +class DummySt: + def __init__(self): + self.calls = [] + + def markdown(self, text, **kwargs): + self.calls.append(text) + + +def test_add_global_styles_injects_css(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_global_styles() + + assert any('stTextInput' in c for c in dummy_st.calls) + + +def test_add_page_header_calls_global_styles_and_renders(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + called = {'css': False} + + def fake_load_css(): + called['css'] = True + + monkeypatch.setattr(style_utils, 'load_css', fake_load_css) + + style_utils.add_page_header('Titolo', icon='✨', description='desc') + + assert called['css'] is True + assert any('✨ Titolo' in c and 'desc' in c for c in dummy_st.calls) + + +def test_add_section_title_renders_text(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_section_title('Section', icon='➡') + + assert any('➡ Section' in c for c in dummy_st.calls) + + +def test_add_home_styles_injects_css(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_home_styles() + + assert any('feature-box' in c for c in dummy_st.calls) diff --git a/tests/test_test_controller.py b/tests/test_test_controller.py new file mode 100644 index 0000000..95fae57 --- /dev/null +++ b/tests/test_test_controller.py @@ -0,0 +1,104 @@ +import os +import sys +from types import SimpleNamespace + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers.test_controller import import_results_action, run_test + + +def test_import_results_action_no_file(mocker): + mock_import = mocker.patch( + "controllers.test_controller.test_result_importer.import_from_file" + ) + mock_load_results = mocker.patch("controllers.test_controller.load_results") + with pytest.raises(ValueError, match="Nessun file caricato"): + import_results_action(None) + mock_import.assert_not_called() + mock_load_results.assert_not_called() + + +def test_import_results_action_failure(mocker): + mock_import = mocker.patch( + "controllers.test_controller.test_result_importer.import_from_file" + ) + mock_load_results = mocker.patch("controllers.test_controller.load_results") + mock_import.side_effect = ValueError("errore") + with pytest.raises(ValueError, match="errore"): + import_results_action("dummy") + mock_load_results.assert_not_called() + + +def test_run_test_success(mocker): + mock_load_all = mocker.patch("controllers.test_controller.Question.load_all") + mock_gen = mocker.patch("controllers.test_controller.generate_answer") + mock_eval = mocker.patch("controllers.test_controller.evaluate_answer") + mock_add_refresh = mocker.patch( + "controllers.test_controller.TestResult.add_and_refresh", return_value="rid" + ) + mock_load_df = mocker.patch( + "controllers.test_controller.TestResult.load_all_df", + return_value=pd.DataFrame(), + ) + mock_load_all.return_value = [SimpleNamespace(id="1", domanda="Q", risposta_attesa="A")] + mock_gen.return_value = "Ans" + mock_eval.return_value = { + "score": 50, + "explanation": "ok", + "similarity": 50, + "correctness": 50, + "completeness": 50, + } + + res = run_test("set1", "name", ["1"], {}, {}) + + assert res["result_id"] == "rid" + assert res["avg_score"] == 50 + assert isinstance(res["results_df"], pd.DataFrame) + assert res["results"]["1"]["actual_answer"] == "Ans" + + +def test_run_test_generation_and_evaluation_errors(mocker): + mock_load_all = mocker.patch("controllers.test_controller.Question.load_all") + mock_gen = mocker.patch("controllers.test_controller.generate_answer") + mock_eval = mocker.patch("controllers.test_controller.evaluate_answer") + mock_add_refresh = mocker.patch( + "controllers.test_controller.TestResult.add_and_refresh", return_value="rid" + ) + mock_load_df = mocker.patch( + "controllers.test_controller.TestResult.load_all_df", + return_value=pd.DataFrame(), + ) + questions = [ + SimpleNamespace(id="1", domanda="Q1", risposta_attesa="A1"), + SimpleNamespace(id="2", domanda="Q2", risposta_attesa="A2"), + ] + mock_load_all.return_value = questions + mock_gen.side_effect = [Exception("gen fail"), "ans2"] + mock_eval.side_effect = [Exception("eval fail")] + + res = run_test("set1", "name", ["1", "2"], {}, {}) + + assert res["result_id"] == "rid" + assert res["avg_score"] == 0 + q1 = res["results"]["1"] + q2 = res["results"]["2"] + assert q1["actual_answer"] == "gen fail" + assert q1["evaluation"]["score"] == 0 + assert q2["actual_answer"] == "ans2" + assert q2["evaluation"]["score"] == 0 + assert isinstance(res["results_df"], pd.DataFrame) + + +def test_export_results_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.test_controller.test_result_importer.export_to_file" + ) + dest = tmp_path / "results.json" + from controllers.test_controller import export_results_action + + export_results_action(dest) + mock_export.assert_called_once_with(dest) diff --git a/tests/test_ui_utils.py b/tests/test_ui_utils.py new file mode 100644 index 0000000..6b3e045 --- /dev/null +++ b/tests/test_ui_utils.py @@ -0,0 +1,21 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import ui_utils, style_utils, component_utils + + +def test_ui_utils_re_exports_functions(): + assert ui_utils.add_global_styles is style_utils.add_global_styles + assert ui_utils.add_page_header is style_utils.add_page_header + assert ui_utils.add_section_title is style_utils.add_section_title + assert ui_utils.create_card is component_utils.create_card + assert ui_utils.create_metrics_container is component_utils.create_metrics_container + expected_all = { + 'add_global_styles', + 'add_page_header', + 'add_section_title', + 'create_card', + 'create_metrics_container', + } + assert set(ui_utils.__all__) == expected_all diff --git a/tests/test_visualizza_risultati_view.py b/tests/test_visualizza_risultati_view.py new file mode 100644 index 0000000..8d217d9 --- /dev/null +++ b/tests/test_visualizza_risultati_view.py @@ -0,0 +1,191 @@ +import os +import sys +import importlib +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + + +@pytest.fixture +def visualizza_risultati(monkeypatch): + import controllers + import importlib.util + import json + from pathlib import Path + + monkeypatch.setattr( + controllers, + "get_results", + lambda *_a, **_k: pd.DataFrame( + [{"id": 1, "set_id": 1, "timestamp": "t", "results": {}}] + ), + ) + monkeypatch.setattr( + controllers, "load_sets", lambda: pd.DataFrame([{ "id": 1, "name": "s" }]) + ) + monkeypatch.setattr(controllers, "list_set_names", lambda *_a: ["s"]) + monkeypatch.setattr(controllers, "list_model_names", lambda *_a: ["m"]) + monkeypatch.setattr( + controllers, "prepare_select_options", lambda df, sets: {1: "r"} + ) + monkeypatch.setattr(json, "dumps", lambda *a, **k: "{}") + + base_path = Path(__file__).resolve().parents[1] / "views" + style_spec = importlib.util.spec_from_file_location("views.style_utils", base_path / "style_utils.py") + style_mod = importlib.util.module_from_spec(style_spec) + sys.modules["views.style_utils"] = style_mod + style_spec.loader.exec_module(style_mod) + + module_path = base_path / "visualizza_risultati.py" + spec = importlib.util.spec_from_file_location("views.visualizza_risultati", module_path) + module = importlib.util.module_from_spec(spec) + sys.modules["views.visualizza_risultati"] = module + spec.loader.exec_module(module) + return module + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.captured_callbacks = {} + + def success(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def write(self, *args, **kwargs): + pass + + def info(self, *args, **kwargs): + pass + + def markdown(self, *args, **kwargs): + pass + + def selectbox(self, label, options, index=0, **kwargs): + return options[index] + + def text_input(self, label, value="", **kwargs): + return value + + def button(self, label, on_click=None, **kwargs): + if on_click: + self.captured_callbacks[label] = on_click + return False + + def file_uploader(self, *args, **kwargs): + return None + + def download_button(self, *args, **kwargs): + pass + + def expander(self, *args, **kwargs): + return DummyContext() + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def stop(self): + pass + + +class StopRender(Exception): + pass + + +def _setup(monkeypatch, visualizza_risultati): + dummy_st = DummySt() + monkeypatch.setattr(visualizza_risultati, "st", dummy_st) + monkeypatch.setattr(visualizza_risultati, "add_page_header", lambda *a, **k: None) + monkeypatch.setattr(visualizza_risultati.json, "dumps", lambda *a, **k: "{}") + + res_df = pd.DataFrame([ + {"id": 1, "set_id": 1, "timestamp": "t", "results": {"method": "LLM"}} + ]) + sets_df = pd.DataFrame([{"id": 1, "name": "s"}]) + + monkeypatch.setattr(visualizza_risultati, "get_results", lambda *_a, **_k: res_df) + monkeypatch.setattr(visualizza_risultati, "load_sets", lambda: sets_df) + monkeypatch.setattr(visualizza_risultati, "list_set_names", lambda *_a: ["s"]) + monkeypatch.setattr(visualizza_risultati, "list_model_names", lambda *_a: ["m"]) + monkeypatch.setattr( + visualizza_risultati, "prepare_select_options", lambda df, sets: {1: "r"} + ) + + def fake_add_section_title(*args, **kwargs): + raise StopRender() + + monkeypatch.setattr(visualizza_risultati, "add_section_title", fake_add_section_title) + + try: + visualizza_risultati.render() + except StopRender: + pass + + return dummy_st + + +def test_import_results_callback_success(monkeypatch, visualizza_risultati): + dummy_st = _setup(monkeypatch, visualizza_risultati) + callback = dummy_st.captured_callbacks.get("Importa Risultati") + assert callback is not None + + res_df = pd.DataFrame([{"id": 2, "set_id": 1, "timestamp": "t2", "results": {}}]) + + def fake_import_results_action(_file): + return res_df, "ok" + + monkeypatch.setattr(visualizza_risultati, "import_results_action", fake_import_results_action) + + dummy_st.session_state.uploaded_results_file = object() + callback() + + assert dummy_st.session_state.import_results_success is True + assert dummy_st.session_state.import_results_message == "ok" + assert dummy_st.session_state.import_results_error is False + assert dummy_st.session_state.uploaded_results_file is None + assert dummy_st.session_state.upload_results is None + assert isinstance(dummy_st.session_state.results, pd.DataFrame) + + +def test_import_results_callback_error(monkeypatch, visualizza_risultati): + dummy_st = _setup(monkeypatch, visualizza_risultati) + callback = dummy_st.captured_callbacks.get("Importa Risultati") + assert callback is not None + + def fake_import_results_action(_file): + raise Exception("fail") + + monkeypatch.setattr(visualizza_risultati, "import_results_action", fake_import_results_action) + + dummy_st.session_state.uploaded_results_file = object() + callback() + + assert dummy_st.session_state.import_results_error is True + assert dummy_st.session_state.import_results_message == "fail" + assert dummy_st.session_state.import_results_success is False + assert dummy_st.session_state.uploaded_results_file is None + assert dummy_st.session_state.upload_results is None + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/__pycache__/__init__.cpython-311.pyc b/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..2221480 Binary files /dev/null and b/utils/__pycache__/__init__.cpython-311.pyc differ diff --git a/utils/__pycache__/cache.cpython-311.pyc b/utils/__pycache__/cache.cpython-311.pyc new file mode 100644 index 0000000..63b5e07 Binary files /dev/null and b/utils/__pycache__/cache.cpython-311.pyc differ diff --git a/utils/__pycache__/data_format_utils.cpython-311.pyc b/utils/__pycache__/data_format_utils.cpython-311.pyc new file mode 100644 index 0000000..cb94050 Binary files /dev/null and b/utils/__pycache__/data_format_utils.cpython-311.pyc differ diff --git a/utils/__pycache__/export_template.cpython-311.pyc b/utils/__pycache__/export_template.cpython-311.pyc new file mode 100644 index 0000000..54d5056 Binary files /dev/null and b/utils/__pycache__/export_template.cpython-311.pyc differ diff --git a/utils/__pycache__/file_reader_utils.cpython-311.pyc b/utils/__pycache__/file_reader_utils.cpython-311.pyc new file mode 100644 index 0000000..5531ef7 Binary files /dev/null and b/utils/__pycache__/file_reader_utils.cpython-311.pyc differ diff --git a/utils/__pycache__/import_template.cpython-311.pyc b/utils/__pycache__/import_template.cpython-311.pyc new file mode 100644 index 0000000..9e4e1d5 Binary files /dev/null and b/utils/__pycache__/import_template.cpython-311.pyc differ diff --git a/utils/__pycache__/openai_client.cpython-311.pyc b/utils/__pycache__/openai_client.cpython-311.pyc new file mode 100644 index 0000000..a7d8270 Binary files /dev/null and b/utils/__pycache__/openai_client.cpython-311.pyc differ diff --git a/utils/__pycache__/startup_utils.cpython-311.pyc b/utils/__pycache__/startup_utils.cpython-311.pyc new file mode 100644 index 0000000..6d838a2 Binary files /dev/null and b/utils/__pycache__/startup_utils.cpython-311.pyc differ diff --git a/utils/cache.py b/utils/cache.py new file mode 100644 index 0000000..34b4356 --- /dev/null +++ b/utils/cache.py @@ -0,0 +1,61 @@ +from functools import lru_cache +from dataclasses import asdict +import pandas as pd + +from models.question import Question +from models.question_set import QuestionSet +from models.api_preset import APIPreset +from models.test_result import TestResult + + +@lru_cache(maxsize=1) +def get_questions() -> pd.DataFrame: + data = [asdict(q) for q in Question.load_all()] + columns = ["id", "domanda", "risposta_attesa", "categoria"] + return pd.DataFrame(data, columns=columns) + + +def refresh_questions() -> pd.DataFrame: + get_questions.cache_clear() + return get_questions() + + +@lru_cache(maxsize=1) +def get_question_sets() -> pd.DataFrame: + data = [asdict(s) for s in QuestionSet.load_all()] + columns = ["id", "name", "questions"] + return pd.DataFrame(data, columns=columns) + + +def refresh_question_sets() -> pd.DataFrame: + get_question_sets.cache_clear() + return get_question_sets() + + +@lru_cache(maxsize=1) +def get_api_presets() -> pd.DataFrame: + data = [asdict(p) for p in APIPreset.load_all()] + columns = [ + "id", + "name", + "provider_name", + "endpoint", + "api_key", + "model", + "temperature", + "max_tokens", + ] + return pd.DataFrame(data, columns=columns) + + +def refresh_api_presets() -> pd.DataFrame: + get_api_presets.cache_clear() + return get_api_presets() + + +def get_results() -> pd.DataFrame: + return TestResult.load_all_df() + + +def refresh_results() -> pd.DataFrame: + return TestResult.refresh_cache() diff --git a/utils/data_format_utils.py b/utils/data_format_utils.py new file mode 100644 index 0000000..0d2023b --- /dev/null +++ b/utils/data_format_utils.py @@ -0,0 +1,58 @@ +import pandas as pd +from typing import Any, Dict, List, Tuple + + +def format_questions_for_view( + questions_df: pd.DataFrame, +) -> Tuple[pd.DataFrame, Dict[str, Dict[str, str]], List[str]]: + """Normalizza il DataFrame delle domande per la visualizzazione. + + Garantisce che la colonna ``categoria`` esista e sia riempita con ``N/A`` quando + mancante. Restituisce il DataFrame normalizzato, una mappa degli ID delle domande + ai rispettivi testi e categorie e l'elenco ordinato delle categorie. + """ + if questions_df is None or questions_df.empty: + df = pd.DataFrame(columns=["id", "domanda", "risposta_attesa", "categoria"]) + categories: List[str] = [] + else: + df = questions_df.copy() + if "categoria" not in df.columns: + df["categoria"] = "N/A" + else: + df["categoria"] = df["categoria"].fillna("N/A") + categories = sorted(list(df["categoria"].astype(str).unique())) + + question_map: Dict[str, Dict[str, str]] = { + str(row.get("id", "")): { + "domanda": row.get("domanda", ""), + "categoria": row.get("categoria", "N/A"), + } + for _, row in df.iterrows() + } + + return df, question_map, categories + + +def build_questions_detail( + question_map: Dict[str, Dict[str, str]], + q_ids: Any, +) -> List[Dict[str, str]]: + """Restituisce i dettagli delle domande per ``q_ids`` usando ``question_map``. + + Ogni elemento della lista restituita contiene ``id``, ``domanda`` e ``categoria`` + della domanda. Gli ID non corrispondenti producono testo vuoto con categoria + ``N/A``. Se ``q_ids`` non è una lista, viene restituita una lista vuota. + """ + + details: List[Dict[str, str]] = [] + if isinstance(q_ids, list): + for q_id in q_ids: + info = question_map.get(str(q_id), {}) + details.append( + { + "id": str(q_id), + "domanda": info.get("domanda", ""), + "categoria": info.get("categoria", "N/A"), + } + ) + return details diff --git a/utils/export_template.py b/utils/export_template.py new file mode 100644 index 0000000..a887314 --- /dev/null +++ b/utils/export_template.py @@ -0,0 +1,34 @@ +"""Template base per l'esportazione di dati su file.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, IO, Union, final + + +class ExportTemplate(ABC): + """Classe base astratta per implementare l'esportazione su file.""" + + @abstractmethod + def gather_data(self) -> Any: + """Raccoglie i dati correnti da esportare.""" + pass + + @final + def export_to_file(self, destination: Union[str, IO[Any]]) -> None: + """Esporta i dati raccolti su ``destination``. + + Nota: le sottoclassi non devono sovrascrivere questo metodo. + + Parameters + ---------- + destination: Union[str, IO[Any]] + Percorso del file di destinazione oppure file aperto in scrittura. + """ + from utils.file_writer_utils import write_dataset + + data = self.gather_data() + write_dataset(data, destination) + + +__all__ = ["ExportTemplate"] diff --git a/utils/file_reader_utils.py b/utils/file_reader_utils.py new file mode 100644 index 0000000..f3a405a --- /dev/null +++ b/utils/file_reader_utils.py @@ -0,0 +1,218 @@ +import os +import json +import uuid +from datetime import datetime +from typing import IO, Any, Dict, Iterable, List, Tuple + +import pandas as pd + +__all__ = [ + "read_questions", + "read_question_sets", + "read_test_results", + "filter_new_rows", +] + +REQUIRED_QUESTION_COLUMNS = ["domanda", "risposta_attesa"] +REQUIRED_SET_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] +REQUIRED_RESULT_COLUMNS = ["id", "set_id", "timestamp", "results"] + + +def filter_new_rows(df: pd.DataFrame, existing_ids: Iterable[str]) -> Tuple[pd.DataFrame, int]: + """Ritorna le righe di ``df`` il cui ``id`` non è in ``existing_ids``. + + Parametri + --------- + df: + DataFrame contenente una colonna ``id``. + existing_ids: + Insieme di identificatori già presenti nel database. + + Restituisce + ----------- + Tuple[pd.DataFrame, int] + Il DataFrame filtrato con sole righe nuove e il conteggio delle nuove righe. + """ + + if df is None or df.empty: + return df, 0 + + existing_set = {str(eid) for eid in existing_ids} + mask = ~df["id"].astype(str).isin(existing_set) + filtered = df[mask].copy() + return filtered, int(mask.sum()) + + +def read_questions(file: IO[str] | IO[bytes]) -> pd.DataFrame: + """Legge un file di domande (CSV o JSON) e restituisce un DataFrame normalizzato.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + elif file_extension == ".json": + try: + data = json.load(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + if isinstance(data, list): + df = pd.DataFrame(data) + elif isinstance(data, dict) and isinstance(data.get("questions"), list): + df = pd.DataFrame(data["questions"]) + else: + raise ValueError( + "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." + ) + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + if df is None or df.empty: + raise ValueError("Il file importato è vuoto o non contiene dati validi.") + + if "question" in df.columns and "domanda" not in df.columns: + df.rename(columns={"question": "domanda"}, inplace=True) + if "expected_answer" in df.columns and "risposta_attesa" not in df.columns: + df.rename(columns={"expected_answer": "risposta_attesa"}, inplace=True) + + if not all(col in df.columns for col in REQUIRED_QUESTION_COLUMNS): + raise ValueError( + f"Il file importato deve contenere le colonne '{REQUIRED_QUESTION_COLUMNS[0]}' e '{REQUIRED_QUESTION_COLUMNS[1]}'." + ) + + if "id" not in df.columns: + df["id"] = [str(uuid.uuid4()) for _ in range(len(df))] + else: + df["id"] = df["id"].astype(str) + + if "categoria" not in df.columns: + df["categoria"] = "" + else: + df["categoria"] = df["categoria"].astype(str).fillna("") + + df["domanda"] = df["domanda"].astype(str).fillna("") + df["risposta_attesa"] = df["risposta_attesa"].astype(str).fillna("") + + return df[["id", "domanda", "risposta_attesa", "categoria"]] + + +def read_question_sets(file: IO[str] | IO[bytes]) -> List[Dict[str, Any]]: + """Legge un file di set di domande (CSV o JSON) e restituisce una lista di dizionari.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + + missing = [c for c in REQUIRED_SET_COLUMNS if c not in df.columns] + if missing: + raise ValueError( + "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_SET_COLUMNS) + ) + + sets_dict: Dict[str, List[Dict[str, str]]] = {} + for _, row in df.iterrows(): + name = str(row["name"]).strip() + if not name: + continue + question = { + "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", + "domanda": str(row["domanda"]).strip() + if not pd.isna(row["domanda"]) + else "", + "risposta_attesa": str(row["risposta_attesa"]).strip() + if not pd.isna(row["risposta_attesa"]) + else "", + "categoria": str(row["categoria"]).strip() + if not pd.isna(row["categoria"]) + else "", + } + sets_dict.setdefault(name, []).append(question) + return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] + + elif file_extension == ".json": + try: + content = file.read() + if isinstance(content, bytes): + content = content.decode("utf-8") + data = json.loads(content) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + + if not isinstance(data, list): + raise ValueError("Il formato del file json non è valido") + return data + + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + +def read_test_results(file: IO[str] | IO[bytes]) -> pd.DataFrame: + """Legge un file di risultati di test (CSV o JSON) e restituisce un DataFrame normalizzato.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + elif file_extension == ".json": + try: + data = json.load(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + if isinstance(data, dict): + data = [data] + if not isinstance(data, list): + raise ValueError( + "Il file JSON deve contenere un oggetto o una lista di risultati." + ) + df = pd.DataFrame(data) + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + if df is None or df.empty: + raise ValueError("Il file importato è vuoto o non contiene dati validi.") + + if "id" not in df.columns: + df["id"] = [str(uuid.uuid4()) for _ in range(len(df))] + else: + df["id"] = df["id"].astype(str) + + if "set_id" not in df.columns: + df["set_id"] = "" + else: + df["set_id"] = df["set_id"].astype(str).fillna("") + + if "timestamp" not in df.columns: + df["timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + else: + df["timestamp"] = df["timestamp"].astype(str).fillna( + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ) + + def _parse_results(value: Any) -> Dict: + if isinstance(value, str): + try: + return json.loads(value) + except Exception: # pragma: no cover - invalid json handled as empty dict + return {} + if isinstance(value, dict): + return value + return {} + + if "results" not in df.columns: + df["results"] = [{} for _ in range(len(df))] + else: + df["results"] = df["results"].apply(_parse_results) + + return df[["id", "set_id", "timestamp", "results"]] diff --git a/utils/file_writer_utils.py b/utils/file_writer_utils.py new file mode 100644 index 0000000..cb8606b --- /dev/null +++ b/utils/file_writer_utils.py @@ -0,0 +1,51 @@ +"""Utility per la serializzazione di dataset in CSV o JSON.""" + +from __future__ import annotations + +import json +import os +from typing import Any, IO, Union + +import pandas as pd + +__all__ = ["write_dataset"] + + +def _ensure_dataframe(data: Any) -> pd.DataFrame: + """Converte ``data`` in ``DataFrame`` se possibile.""" + if isinstance(data, pd.DataFrame): + return data + return pd.DataFrame(data) + + +def write_dataset(data: Any, destination: Union[str, IO[str]]) -> None: + """Scrive ``data`` su ``destination`` in formato CSV o JSON. + + Il formato viene determinato dall'estensione del file. + ``destination`` può essere un percorso o un file aperto in scrittura. + """ + close_after = False + if isinstance(destination, (str, os.PathLike)): + file_path = os.fspath(destination) + ext = os.path.splitext(file_path)[1].lower() + f: IO[str] = open(file_path, "w", encoding="utf-8", newline="") + close_after = True + else: + f = destination + name = getattr(f, "name", "") + ext = os.path.splitext(name)[1].lower() + + if ext == ".csv": + df = _ensure_dataframe(data) + df.to_csv(f, index=False) + elif ext == ".json": + if isinstance(data, pd.DataFrame): + payload = data.to_dict(orient="records") + else: + payload = data + json.dump(payload, f, ensure_ascii=False, indent=2) + else: + raise ValueError("Formato file non supportato. Usare estensione .csv o .json") + + if close_after: + f.close() diff --git a/utils/import_template.py b/utils/import_template.py new file mode 100644 index 0000000..8c7054a --- /dev/null +++ b/utils/import_template.py @@ -0,0 +1,72 @@ +"""Template base per l'importazione di dati da file.""" + +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import Any, IO, final + +logger = logging.getLogger(__name__) + + +class ImportTemplate(ABC): + """Classe base astratta per implementare l'importazione da file. + + Le sottoclassi devono implementare ``parse_file`` e ``persist_data`` mentre + questo template gestisce il flusso dell'operazione e l'handling degli errori. + """ + + @final + def import_from_file(self, file: IO[Any]) -> Any: + """Esegue l'importazione da un file. + + Questo metodo fornisce il flusso standard dell'importazione e **non deve + essere sovrascritto** dalle sottoclassi. + + Il flusso standard prevede il parsing tramite :meth:`parse_file` seguito + dalla persistenza dei dati con :meth:`persist_data`. + + Parameters + ---------- + file: IO[Any] + File aperto da cui leggere i dati. Non viene chiuso. + + Returns + ------- + Any + Il valore restituito da :meth:`persist_data`. + + Raises + ------ + ValueError + Se si verifica un errore durante l'importazione. L'eccezione + originale viene loggata e incapsulata in un ``ValueError``. + """ + try: + logger.debug( + "Avvio importazione dal file %s", getattr(file, "name", "") + ) + parsed = self.parse_file(file) + logger.debug("Parsing completato: %s", parsed) + result = self.persist_data(parsed) + logger.info( + "Importazione completata con successo dal file %s", + getattr(file, "name", ""), + ) + return result + except Exception as exc: # noqa: BLE001 + logger.exception("Errore durante l'importazione: %s", exc) + raise ValueError("Errore durante l'importazione") from exc + + @abstractmethod + def parse_file(self, file: IO[Any]) -> Any: + """Legge e interpreta il contenuto di ``file``.""" + pass + + @abstractmethod + def persist_data(self, parsed: Any) -> Any: + """Persiste i dati parsati nel database o in altra destinazione.""" + pass + + +__all__ = ["ImportTemplate"] diff --git a/utils/openai_client.py b/utils/openai_client.py new file mode 100644 index 0000000..074b5a2 --- /dev/null +++ b/utils/openai_client.py @@ -0,0 +1,107 @@ +"""Utility per interagire con le API dei provider LLM.""" + +import logging +from typing import Any, List + +from openai import OpenAI + +logger = logging.getLogger(__name__) + +DEFAULT_MODEL: str = "gpt-4o" +DEFAULT_ENDPOINT: str = "https://api.openai.com/v1" + + +class ClientCreationError(Exception): + """Eccezione sollevata quando la creazione del client OpenAI fallisce.""" + + +def get_openai_client(api_key: str, base_url: str | None = None) -> OpenAI: + """Crea e restituisce un client OpenAI configurato. + + Solleva ``ClientCreationError`` se la chiave API è mancante o la creazione fallisce. + """ + + if not api_key: + logger.warning("Tentativo di creare client OpenAI senza chiave API.") + raise ClientCreationError("Chiave API mancante") + try: + effective_base_url = ( + base_url + if base_url and base_url.strip() and base_url != "custom" + else DEFAULT_ENDPOINT + ) + return OpenAI(api_key=api_key, base_url=effective_base_url) + except Exception as exc: # noqa: BLE001 + logger.error(f"Errore durante la creazione del client OpenAI: {exc}") + raise ClientCreationError(str(exc)) from exc + + +def get_available_models_for_endpoint( + provider_name: str, + endpoint_url: str | None = None, + api_key: str | None = None, +) -> List[str]: + """Restituisce una lista di modelli disponibili basata sul provider o sull'endpoint.""" + # Aggiungi altri provider predefiniti qui + # elif provider_name == "XAI": + # return XAI_MODELS + if provider_name == "Personalizzato": + if ( + not api_key + or not endpoint_url + or endpoint_url == "custom" + or not endpoint_url.strip() + ): + return [ + "(Endpoint personalizzato non specificato)", + DEFAULT_MODEL, + "gpt-4", + "gpt-3.5-turbo", + ] + + try: + client = get_openai_client(api_key=api_key, base_url=endpoint_url) + except ClientCreationError: + return ["(Errore creazione client API)", DEFAULT_MODEL] + try: + models_response: Any = client.models.list() + models: Any = getattr(models_response, "data", models_response) + filtered_models: List[str] = sorted( + [ + model.id + for model in models + if not any( + term in model.id.lower() for term in ["embed", "embedding"] + ) + and ( + any( + term in model.id.lower() + for term in ["chat", "instruct", "gpt", "claude", "grok"] + ) + or len(model.id.split("-")) > 2 + ) + ] + ) + if not filtered_models: + filtered_models = sorted( + [ + model.id + for model in models + if not any( + term in model.id.lower() for term in ["embed", "embedding"] + ) + ] + ) + return filtered_models if filtered_models else [DEFAULT_MODEL] + except Exception: + return ["(Errore recupero modelli)", DEFAULT_MODEL] + return [DEFAULT_MODEL] + + +__all__ = [ + "DEFAULT_MODEL", + "DEFAULT_ENDPOINT", + "ClientCreationError", + "get_openai_client", + "get_available_models_for_endpoint", +] diff --git a/utils/startup_utils.py b/utils/startup_utils.py new file mode 100644 index 0000000..bbf77e7 --- /dev/null +++ b/utils/startup_utils.py @@ -0,0 +1,52 @@ +import logging +import os + +from pathlib import Path +from typing import TypedDict + +from models.database import DatabaseEngine +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + +logger = logging.getLogger(__name__) + + +def setup_logging(level: int = logging.INFO, log_file: str | Path | None = None) -> None: + """Configura il logger radice con un formato di base. + + Se viene fornito ``log_file`` i log vengono scritti anche su tale file. + """ + # ``logging.basicConfig`` non accetta un dizionario tipato con ``**`` in modo + # sicuro per mypy. Passiamo quindi gli argomenti esplicitamente in modo da + # evitare problemi di tipizzazione. + filename = str(log_file) if log_file is not None else None + logging.basicConfig( + level=level, + format="%(asctime)s - %(levelname)s - %(message)s", + filename=filename, + ) + + +def initialize_database() -> None: + """Inizializza il database dell'applicazione.""" + DatabaseEngine.instance().init_db() + + +class DefaultConfig(TypedDict): + """Configurazione di default per il client OpenAI.""" + + api_key: str + endpoint: str + model: str + temperature: float + max_tokens: int + + +def load_default_config() -> DefaultConfig: + """Restituisce la configurazione API di default.""" + return { + "api_key": os.environ.get("OPENAI_API_KEY", ""), + "endpoint": DEFAULT_ENDPOINT, + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } diff --git a/views/__init__.py b/views/__init__.py new file mode 100644 index 0000000..cb47f14 --- /dev/null +++ b/views/__init__.py @@ -0,0 +1,23 @@ +"""Pacchetto delle viste.""" + +import logging +from typing import Callable, Dict + +logger = logging.getLogger(__name__) + + +page_registry: Dict[str, Callable] = {} + + +def register_page(name: str): + """Decoratore per registrare la funzione di rendering di una pagina.""" + + def decorator(func: Callable) -> Callable: + if name in page_registry: + messaggio = f"La pagina '{name}' è già registrata" + logger.warning(messaggio) + raise ValueError(messaggio) + page_registry[name] = func + return func + + return decorator diff --git a/views/__pycache__/__init__.cpython-311.pyc b/views/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..0004b71 Binary files /dev/null and b/views/__pycache__/__init__.cpython-311.pyc differ diff --git a/views/__pycache__/api_configurazione.cpython-311.pyc b/views/__pycache__/api_configurazione.cpython-311.pyc new file mode 100644 index 0000000..6956b06 Binary files /dev/null and b/views/__pycache__/api_configurazione.cpython-311.pyc differ diff --git a/views/__pycache__/esecuzione_test.cpython-311.pyc b/views/__pycache__/esecuzione_test.cpython-311.pyc new file mode 100644 index 0000000..6b369f3 Binary files /dev/null and b/views/__pycache__/esecuzione_test.cpython-311.pyc differ diff --git a/views/__pycache__/gestione_domande.cpython-311.pyc b/views/__pycache__/gestione_domande.cpython-311.pyc new file mode 100644 index 0000000..276ba1d Binary files /dev/null and b/views/__pycache__/gestione_domande.cpython-311.pyc differ diff --git a/views/__pycache__/gestione_set.cpython-311.pyc b/views/__pycache__/gestione_set.cpython-311.pyc new file mode 100644 index 0000000..604e95d Binary files /dev/null and b/views/__pycache__/gestione_set.cpython-311.pyc differ diff --git a/views/__pycache__/home.cpython-311.pyc b/views/__pycache__/home.cpython-311.pyc new file mode 100644 index 0000000..09f1213 Binary files /dev/null and b/views/__pycache__/home.cpython-311.pyc differ diff --git a/views/__pycache__/session_state.cpython-311.pyc b/views/__pycache__/session_state.cpython-311.pyc new file mode 100644 index 0000000..9824153 Binary files /dev/null and b/views/__pycache__/session_state.cpython-311.pyc differ diff --git a/views/__pycache__/set_helpers.cpython-311.pyc b/views/__pycache__/set_helpers.cpython-311.pyc new file mode 100644 index 0000000..3a1780f Binary files /dev/null and b/views/__pycache__/set_helpers.cpython-311.pyc differ diff --git a/views/__pycache__/state_models.cpython-311.pyc b/views/__pycache__/state_models.cpython-311.pyc new file mode 100644 index 0000000..1cd9be5 Binary files /dev/null and b/views/__pycache__/state_models.cpython-311.pyc differ diff --git a/views/__pycache__/style_utils.cpython-311.pyc b/views/__pycache__/style_utils.cpython-311.pyc new file mode 100644 index 0000000..f44c198 Binary files /dev/null and b/views/__pycache__/style_utils.cpython-311.pyc differ diff --git a/views/__pycache__/visualizza_risultati.cpython-311.pyc b/views/__pycache__/visualizza_risultati.cpython-311.pyc new file mode 100644 index 0000000..a65a827 Binary files /dev/null and b/views/__pycache__/visualizza_risultati.cpython-311.pyc differ diff --git a/views/api_configurazione.py b/views/api_configurazione.py new file mode 100644 index 0000000..17c9012 --- /dev/null +++ b/views/api_configurazione.py @@ -0,0 +1,291 @@ +import logging +import streamlit as st + +# from views import register_page +from views.style_utils import add_page_header, add_section_title +from controllers import ( + save_preset, + delete_preset, + load_presets, + list_presets, + get_preset_by_id, + validate_preset, + test_api_connection, +) +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + +logger = logging.getLogger(__name__) + + +# Funzioni di callback per i pulsanti del form +def start_new_preset_edit(): + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = None # Indica nuovo preset + st.session_state.preset_form_data = { + "name": "", + "endpoint": DEFAULT_ENDPOINT, + "api_key": "", + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000 + } + + +def start_existing_preset_edit(preset_id: str) -> None: + preset_to_edit = get_preset_by_id(preset_id, st.session_state.api_presets) + if not preset_to_edit: + st.error("Preset non trovato.") + return + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = preset_id + st.session_state.preset_form_data = preset_to_edit.copy() + # Assicura che i campi numerici siano del tipo corretto per gli slider/number_input + st.session_state.preset_form_data["temperature"] = float( + st.session_state.preset_form_data.get("temperature", 0.0) + ) + st.session_state.preset_form_data["max_tokens"] = int( + st.session_state.preset_form_data.get("max_tokens", 1000) + ) + if "endpoint" not in st.session_state.preset_form_data: + st.session_state.preset_form_data["endpoint"] = DEFAULT_ENDPOINT + + +def cancel_preset_edit(): + st.session_state.editing_preset = False + st.session_state.current_preset_edit_id = None + st.session_state.preset_form_data = {} + + +def save_preset_from_form(): + """Salva un preset leggendo i valori direttamente dagli input della form.""" + # Recupera sempre i valori correnti dei widget dal session_state + preset_name = st.session_state.get("preset_name", "").strip() + endpoint = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + api_key = st.session_state.get("preset_api_key", "") + model = st.session_state.get("preset_model", DEFAULT_MODEL) + temperature = float( + st.session_state.get( + "preset_temperature", + st.session_state.preset_form_data.get("temperature", 0.0), + ) + ) + max_tokens = int( + st.session_state.get( + "preset_max_tokens", + st.session_state.preset_form_data.get("max_tokens", 1000), + ) + ) + + # Aggiorna il dizionario del form in sessione con i valori raccolti + st.session_state.preset_form_data.update( + { + "name": preset_name, + "endpoint": endpoint, + "api_key": api_key, + "model": model, + "temperature": temperature, + "max_tokens": max_tokens, + } + ) + + form_data = st.session_state.preset_form_data.copy() + current_id = st.session_state.current_preset_edit_id + + is_valid, validation_message = validate_preset(form_data, current_id) + if not is_valid: + st.error(validation_message) + return + + success, message, updated_df = save_preset(form_data, current_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + cancel_preset_edit() # Chiudi il form + else: + st.error(message) + + +def delete_preset_callback(preset_id): + success, message, updated_df = delete_preset(preset_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + if st.session_state.current_preset_edit_id == preset_id: + cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form + else: + st.error(message) + + +# @register_page("Configurazione API") +def render(): + add_page_header( + "Gestione Preset API", + icon="⚙️", + description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." + ) + + # Stato della sessione per la gestione del form di creazione/modifica preset + if "editing_preset" not in st.session_state: + st.session_state.editing_preset = False + if "current_preset_edit_id" not in st.session_state: + st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica + if "preset_form_data" not in st.session_state: + st.session_state.preset_form_data = {} + + # Carica i preset API utilizzando la cache + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + + # Sezione per visualizzare/modificare i preset + if st.session_state.editing_preset: + add_section_title("Modifica/Crea Preset API", icon="✏️") + form_data = st.session_state.preset_form_data + + with st.form(key="preset_form"): + # Usa un key specifico per il campo nome e aggiorna il form_data + form_data["name"] = st.text_input( + "Nome del Preset", + value=form_data.get("name", ""), + key="preset_name", # Key esplicita per il campo nome + help="Un nome univoco per questo preset." + ) + + # Campo chiave API con key esplicita + form_data["api_key"] = st.text_input( + "Chiave API", + value=form_data.get("api_key", ""), + type="password", + key="preset_api_key", # Key esplicita per la chiave API + help="La tua chiave API per il provider selezionato." + ) + + # Campo endpoint con key esplicita + form_data["endpoint"] = st.text_input( + "Provider Endpoint", + value=form_data.get("endpoint", DEFAULT_ENDPOINT), + placeholder="https://api.openai.com/v1", + key="preset_endpoint", # Key esplicita per l'endpoint + help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" + ) + + # Modello sempre personalizzabile + form_data["model"] = st.text_input( + "Modello", + value=form_data.get("model", DEFAULT_MODEL), + placeholder="gpt-4o", + key="preset_model", # Key esplicita per il modello + help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" + ) + + form_data["temperature"] = st.slider( + "Temperatura", + 0.0, + 2.0, + float(form_data.get("temperature", 0.0)), + 0.1, + key="preset_temperature", + ) + form_data["max_tokens"] = st.number_input( + "Max Tokens", + min_value=50, + max_value=8000, + value=int(form_data.get("max_tokens", 1000)), + step=50, + key="preset_max_tokens", + ) + + # Campo Test Connessione e pulsanti di salvataggio/annullamento + # Pulsante Test Connessione + if st.form_submit_button("⚡ Testa Connessione API"): + # Usa direttamente i valori dal session_state per il test + api_key_to_test = st.session_state.get("preset_api_key", "") + endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) + + with st.spinner("Test in corso..."): + success, message = test_api_connection( + api_key=api_key_to_test, + endpoint=endpoint_to_test, + model=model_to_test, + temperature=form_data.get("temperature", 0.0), + max_tokens=form_data.get("max_tokens", 1000) + ) + if success: + st.success(message) + else: + st.error(message) + + # Pulsanti Salva e Annulla + cols_form_buttons = st.columns(2) + with cols_form_buttons[0]: + if st.form_submit_button( + "💾 Salva Preset", + on_click=save_preset_from_form, + type="primary", + use_container_width=True, + ): + pass # Il callback gestisce il salvataggio + with cols_form_buttons[1]: + if st.form_submit_button( + "❌ Annulla", + on_click=cancel_preset_edit, + use_container_width=True, + ): + pass # Il callback gestisce il cambio di stato + else: + add_section_title("Preset API Salvati", icon="🗂️") + if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): + pass # Il callback gestisce il cambio di stato + + preset_list = list_presets(st.session_state.api_presets) + if not preset_list: + st.info( + "Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare." + ) + else: + for preset in preset_list: + with st.container(): + st.markdown(f"#### {preset['name']}") + cols_preset_details = st.columns([3, 1, 1]) + with cols_preset_details[0]: + st.caption(f"Modello: {preset.get('model', 'N/A')}") + st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") + with cols_preset_details[1]: + if st.button( + "✏️ Modifica", + key=f"edit_{preset['id']}", + on_click=start_existing_preset_edit, + args=(preset['id'],), + use_container_width=True, + ): + pass + with cols_preset_details[2]: + if st.button( + "🗑️ Elimina", + key=f"delete_{preset['id']}", + on_click=delete_preset_callback, + args=(preset['id'],), + type="secondary", + use_container_width=True, + ): + pass + st.divider() + + # Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) + if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui + st.success(st.session_state.preset_applied_message) + del st.session_state.preset_applied_message + + if "preset_saved_message" in st.session_state: + st.success(st.session_state.preset_saved_message) + del st.session_state.preset_saved_message + + if "preset_deleted_message" in st.session_state: + st.success(st.session_state.preset_deleted_message) + del st.session_state.preset_deleted_message + + +if __name__ == "__main__": + render() +else: + render() diff --git a/views/component_utils.py b/views/component_utils.py new file mode 100644 index 0000000..aa942d3 --- /dev/null +++ b/views/component_utils.py @@ -0,0 +1,180 @@ +import logging +from typing import Any + +import streamlit as st +logger = logging.getLogger(__name__) + + +def create_card(title: str, content: str, icon: str | None = None, + is_success: bool = False, is_warning: bool = False, is_error: bool = False): + """Crea una scheda stilizzata con un contenuto personalizzabile.""" + color = "#4F6AF0" + bg_color = "white" + shadow_color = "rgba(79, 106, 240, 0.15)" + + if is_success: + color = "#28a745" + bg_color = "#f8fff9" + shadow_color = "rgba(40, 167, 69, 0.15)" + elif is_warning: + color = "#ffc107" + bg_color = "#fffef8" + shadow_color = "rgba(255, 193, 7, 0.15)" + elif is_error: + color = "#dc3545" + bg_color = "#fff8f8" + shadow_color = "rgba(220, 53, 69, 0.15)" + + icon_text = f'{icon}' if icon else "" + + st.markdown( + f""" + + +
+
{icon_text}{title}
+
{content}
+
+ """, + unsafe_allow_html=True, + ) + + +def create_metrics_container(metrics_data: list[dict[str, Any]]) -> None: + """Crea un contenitore con metriche ben stilizzate.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + metrics_html = '
' + for metric in metrics_data: + icon_html = ( + f'
{metric.get("icon", "")}
' + if metric.get("icon") + else "" + ) + unit = metric.get("unit", "") + unit_html = f'{unit}' if unit else "" + help_text = f'title="{metric.get("help")}"' if metric.get("help") else "" + + metrics_html += f""" +
+ {icon_html} +
{metric['value']}{unit_html}
+
{metric['label']}
+
+ """ + + metrics_html += '
' + st.markdown(metrics_html, unsafe_allow_html=True) diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py new file mode 100644 index 0000000..628c1ad --- /dev/null +++ b/views/esecuzione_test.py @@ -0,0 +1,173 @@ +import logging + +import streamlit as st + +from controllers import run_test, load_sets, load_presets, get_preset_by_name +# from views import register_page +from views.style_utils import add_page_header, add_section_title +logger = logging.getLogger(__name__) + + +# === FUNZIONI DI CALLBACK === + +def set_llm_mode_callback(): + """Funzione di callback: imposta la modalità LLM""" + if st.session_state.test_mode != "Valutazione Automatica con LLM": + st.session_state.test_mode = "Valutazione Automatica con LLM" + st.session_state.mode_changed = True + + +def run_llm_test_callback(): + """Funzione di callback: esegue il test LLM""" + st.session_state.run_llm_test = True + + +# @register_page("Esecuzione Test") +def render(): + # === Inizializzazione delle variabili di stato === + if 'test_mode' not in st.session_state: + st.session_state.test_mode = "Valutazione Automatica con LLM" + if 'mode_changed' not in st.session_state: + st.session_state.mode_changed = False + if 'run_llm_test' not in st.session_state: + st.session_state.run_llm_test = False + + # Gestisce il cambio di modalità + if st.session_state.mode_changed: + st.session_state.mode_changed = False + st.rerun() + + add_page_header( + "Esecuzione Test", + icon="🧪", + description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." + ) + + # Carica i dati necessari, utilizzando cache e session state + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + if st.session_state.api_presets.empty: + st.error( + "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' " + "per crearne almeno uno prima di eseguire i test." + ) + st.stop() + + # Controlla se ci sono set di domande disponibili + if st.session_state.question_sets.empty: + st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") + st.stop() + + # Seleziona set di domande per il test + add_section_title("Seleziona Set di Domande", icon="📚") + set_options = {} + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + for _, row in st.session_state.question_sets.iterrows(): + if 'questions' in row and row['questions']: + set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" + + if not set_options: + st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") + st.stop() + + selected_set_id = st.selectbox( + "Seleziona un set di domande", + options=list(set_options.keys()), + format_func=lambda x: set_options[x], + key="select_question_set_for_test" + ) + + selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] + questions_in_set = selected_set['questions'] + + # --- Opzioni API basate su Preset --- + add_section_title("Opzioni API basate su Preset", icon="🛠️") + + preset_display_names = list(st.session_state.api_presets["name"]) + + # Seleziona preset per generazione risposta (comune a entrambe le modalità) + generation_preset_name = st.selectbox( + "Seleziona Preset per Generazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="generation_preset_select", + help="Il preset API utilizzato per generare la risposta alla domanda." + ) + st.session_state.selected_generation_preset_name = generation_preset_name + + # Seleziona preset per valutazione (solo per modalità LLM) + if st.session_state.test_mode == "Valutazione Automatica con LLM": + evaluation_preset_name = st.selectbox( + "Seleziona Preset per Valutazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="evaluation_preset_select", + help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." + ) + st.session_state.selected_evaluation_preset_name = evaluation_preset_name + + # --- Logica di Esecuzione Test --- + test_mode_selected = st.session_state.test_mode + + if test_mode_selected == "Valutazione Automatica con LLM": + st.header("Esecuzione: Valutazione Automatica con LLM") + + # Pulsante che utilizza la funzione di callback + st.button( + "🚀 Esegui Test con LLM", + key="run_llm_test_btn", + on_click=run_llm_test_callback + ) + + # Gestisce l'esecuzione del test + if st.session_state.run_llm_test: + st.session_state.run_llm_test = False # Resetta lo stato + + gen_preset_config = get_preset_by_name( + st.session_state.selected_generation_preset_name, + st.session_state.api_presets, + ) + eval_preset_config = get_preset_by_name( + st.session_state.selected_evaluation_preset_name, + st.session_state.api_presets, + ) + + if not gen_preset_config or not eval_preset_config: + st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") + else: + with st.spinner("Generazione risposte e valutazione LLM in corso..."): + exec_result = run_test( + selected_set_id, + selected_set['name'], + questions_in_set, + gen_preset_config, + eval_preset_config, + ) + + if exec_result: + st.session_state.results = exec_result['results_df'] + st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") + + # Visualizzazione risultati dettagliati + st.subheader("Risultati Dettagliati") + for q_id, result in exec_result['results'].items(): + with st.expander( + f"Domanda: {result['question'][:50]}..." + ): + col1, col2 = st.columns(2) + with col1: + st.write("**Domanda:**", result['question']) + st.write("**Risposta Attesa:**", result['expected_answer']) + with col2: + st.write("**Risposta Generata:**", result['actual_answer']) + st.write("**Punteggio:**", f"{result['evaluation']['score']:.1f}%") + st.write("**Valutazione:**", result['evaluation']['explanation']) + + +if __name__ == "__main__": + render() +else: + render() diff --git a/views/gestione_domande.py b/views/gestione_domande.py new file mode 100644 index 0000000..f4e4ac9 --- /dev/null +++ b/views/gestione_domande.py @@ -0,0 +1,311 @@ +import logging + +import streamlit as st +import pandas as pd + +from controllers import ( + add_question, + get_filtered_questions, + load_questions, + save_question_action, + delete_question_action, + import_questions_action, +) +# from views import register_page +from views.style_utils import add_page_header +from views.state_models import QuestionPageState +logger = logging.getLogger(__name__) + + +# === FUNZIONI DI CALLBACK === + + +def create_save_question_callback( + question_id, edited_question, edited_answer, edited_category +): + def callback(): + state = QuestionPageState() + try: + result = save_question_action( + question_id, edited_question, edited_answer, edited_category + ) + if result["success"]: + state.save_success = True + state.save_success_message = "Domanda salvata." + st.session_state.questions = result["questions_df"] + state.trigger_rerun = True + else: + state.save_error = True + state.save_error_message = "Domanda non salvata." + except Exception as e: + state.save_error = True + state.save_error_message = f"Domanda non salvata: {e}" + st.session_state.question_page_state = state + + return callback + + +def import_questions_callback(): + uploaded_file = st.session_state.get("uploaded_file_content") + state = QuestionPageState() + try: + result = import_questions_action(uploaded_file) + st.session_state.questions = result["questions_df"] + count = result.get("imported_count", 0) + warnings = result.get("warnings", []) + + if count > 0: + state.import_success = True + msg = f"Importate con successo {count} domande." + if warnings: + msg = f"{msg} Avvisi: {'; '.join(warnings)}" + state.import_success_message = msg + else: + state.import_error = True + msg = "Nessuna domanda importata." + if warnings: + msg = f"{msg} {'; '.join(warnings)}" + state.import_error_message = msg + + state.trigger_rerun = True + except Exception as e: + state.import_error = True + state.import_error_message = str(e) + st.session_state.question_page_state = state + st.session_state.pop("upload_questions_file", None) + st.session_state.uploaded_file_content = None + + +# === FUNZIONI DI DIALOGO === + +@st.dialog("Conferma Eliminazione") +def confirm_delete_question_dialog(question_id, question_text): + """Dialogo di conferma per l'eliminazione della domanda""" + st.write("Sei sicuro di voler eliminare questa domanda?") + st.write(f"**Domanda:** {question_text[:100]}...") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + state = QuestionPageState() + try: + questions = delete_question_action(question_id) + state.delete_success = True + st.session_state.questions = questions + state.trigger_rerun = True + except Exception as e: + state.save_error = True + state.save_error_message = str(e) + st.session_state.question_page_state = state + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +# @register_page("Gestione Domande") +def render(): + # === Inizializzazione dello stato === + st.session_state.setdefault("question_page_state", QuestionPageState()) + state: QuestionPageState = st.session_state.question_page_state + + # Carica le domande utilizzando la cache solo se non già presenti + if "questions" not in st.session_state: + st.session_state.questions = load_questions() + + # Gestisce la logica di rerun + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + # Mostra i messaggi di stato + if state.save_success: + st.success(state.save_success_message) + if state.save_error: + st.error(state.save_error_message) + if state.delete_success: + st.success(state.delete_success_message) + if state.add_success: + st.success(state.add_success_message) + if state.import_success: + st.success(state.import_success_message) + if state.import_error: + st.error(state.import_error_message) + + # Resetta lo stato dopo la visualizzazione dei messaggi + st.session_state.question_page_state = QuestionPageState() + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Domande", + icon="📋", + description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." + ) + + # Scheda per diverse funzioni di gestione delle domande + tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) + + # Scheda Visualizza e Modifica Domande + with tabs[0]: + st.header("Visualizza e Modifica Domande") + + if 'questions' in st.session_state and not st.session_state.questions.empty: + _, categories = get_filtered_questions() + category_options = ["Tutte le categorie"] + categories + + selected_category = st.selectbox( + "Filtra per categoria:", + options=category_options, + index=0 + ) + + filter_cat = None if selected_category == "Tutte le categorie" else selected_category + filtered_questions_df, _ = get_filtered_questions(filter_cat) + + if not filtered_questions_df.empty: + for idx, row in filtered_questions_df.iterrows(): + category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' + with st.expander( + f"{row['domanda'][:100]}... (Categoria: {category_display})" + ): + col1, col2 = st.columns([3, 1]) + + with col1: + edited_question = st.text_area( + f"Modifica Domanda {idx + 1}", + value=row['domanda'], + key=f"q_edit_{row['id']}" + ) + + edited_answer = st.text_area( + f"Modifica Risposta Attesa {idx + 1}", + value=row['risposta_attesa'], + key=f"a_edit_{row['id']}" + ) + + edited_category_value = row.get('categoria', '') + edited_category = st.text_input( + f"Modifica Categoria {idx + 1}", + value=edited_category_value, + key=f"c_edit_{row['id']}" + ) + + with col2: + st.button( + "Salva Modifiche", + key=f"save_{row['id']}", + on_click=create_save_question_callback( + row['id'], edited_question, edited_answer, edited_category + ), + ) + + if st.button( + "Elimina Domanda", + key=f"delete_{row['id']}", + type="secondary" + ): + confirm_delete_question_dialog(row['id'], row['domanda']) + else: + st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") + + else: + st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") + + # Scheda Aggiungi Domande + with tabs[1]: + st.header("Aggiungi Nuova Domanda") + + with st.form("add_question_form"): + domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") + risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") + categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") + + submitted = st.form_submit_button("Aggiungi Domanda") + + if submitted: + if domanda and risposta_attesa: + # Passa la categoria, che può essere una stringa vuota se non inserita + question_id = add_question( + domanda=domanda, + risposta_attesa=risposta_attesa, + categoria=categoria, + ) + state = QuestionPageState() + state.add_success = True + state.add_success_message = ( + f"Domanda aggiunta con successo con ID: {question_id}" + ) + state.trigger_rerun = True + st.session_state.question_page_state = state + st.session_state.questions = load_questions() + st.rerun() + else: + st.error("Sono necessarie sia la domanda che la risposta attesa.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Domande da File") + + st.write(""" + Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). + + ### Formato File: + - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + + ### Esempio CSV: + ```csv + domanda,risposta_attesa,categoria + "Quanto fa 2+2?","4","Matematica Base" + "Qual è la capitale della Francia?","Parigi","Geografia" + "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" + ``` + + ### Esempio JSON: + ```json + [ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } + ] + ``` + """) + + uploaded_file = st.file_uploader( + "Scegli un file", type=["csv", "json"], key="upload_questions_file" + ) + + if uploaded_file is not None: + # Salva il file in session_state per l'uso da parte della callback + st.session_state.uploaded_file_content = uploaded_file + + # Pulsante che utilizza la funzione di callback + st.button( + "Importa Domande", + key="import_questions_btn", + on_click=import_questions_callback + ) + + +if __name__ == "__main__": + render() +else: + render() diff --git a/views/gestione_set.py b/views/gestione_set.py new file mode 100644 index 0000000..7fde132 --- /dev/null +++ b/views/gestione_set.py @@ -0,0 +1,358 @@ +import logging +import streamlit as st +from controllers import ( + create_set, + load_sets, + prepare_sets_for_view, +) +# from views import register_page +from views.style_utils import add_page_header, add_global_styles +from views.state_models import SetPageState +from views.set_helpers import ( + confirm_delete_set_dialog, + import_set_callback, + mark_expander_open, + create_save_set_callback, +) + +logger = logging.getLogger(__name__) + + +# @register_page("Gestione Set di Domande") +def render(): + add_global_styles() + + st.session_state.setdefault("set_page_state", SetPageState()) + state: SetPageState = st.session_state.set_page_state + + st.session_state.setdefault("question_checkboxes", {}) + st.session_state.setdefault("newly_selected_questions", {}) + st.session_state.setdefault("set_expanders", {}) + + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + if state.save_set_success: + st.success(state.save_set_success_message) + state.save_set_success = False + + if state.save_set_error: + st.error(state.save_set_error_message) + state.save_set_error = False + + if state.delete_set_success: + st.success(state.delete_set_success_message) + state.delete_set_success = False + + if state.create_set_success: + st.success(state.create_set_success_message) + state.create_set_success = False + + if state.import_set_success: + st.success(state.import_set_success_message) + state.import_set_success = False + + if state.import_set_error: + st.error(state.import_set_error_message) + state.import_set_error = False + + # Inizializza i dati tramite il controller + initial_data = prepare_sets_for_view() + st.session_state.questions = initial_data["questions_df"] + st.session_state.question_sets = initial_data["raw_sets_df"] + + # Assicurati che esista lo stato degli expander per ogni set + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + current_set_ids = st.session_state.question_sets['id'].tolist() + # Rimuovi stati per set non più presenti + for sid in list(st.session_state.set_expanders.keys()): + if sid not in current_set_ids: + del st.session_state.set_expanders[sid] + # Aggiungi stato predefinito per nuovi set + for sid in current_set_ids: + st.session_state.set_expanders.setdefault(sid, False) + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Set di Domande", + icon="📚", + description="Organizza le tue domande in set per test e valutazioni" + ) + + # Schede per diverse funzioni di gestione dei set + tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) + + # Scheda Visualizza e Modifica Set + with tabs[0]: + st.header("Visualizza e Modifica Set di Domande") + + categories = initial_data["categories"] + selected_categories = st.multiselect( + "Filtra per categorie (mostra i set che contengono almeno una domanda da OGNI categoria selezionata):", + options=categories, + default=[], + key="filter_categories", + ) + + data = prepare_sets_for_view(selected_categories) + questions_df = data["questions_df"] + display_sets_df = data["sets_df"] + st.session_state.questions = questions_df + + questions_ready = ( + not questions_df.empty + and 'domanda' in questions_df.columns + and 'categoria' in questions_df.columns + ) + + if display_sets_df.empty: + if selected_categories: + st.info( + "Nessun set trovato che contenga domande da tutte le categorie selezionate: " + f"{', '.join(selected_categories)}." + ) + else: + st.info( + "Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'." + ) + else: + for idx, row in display_sets_df.iterrows(): + exp_key = f"set_expander_{row['id']}" + if exp_key not in st.session_state.set_expanders: + st.session_state.set_expanders[exp_key] = False + + with st.expander( + f"{row['name']}", + expanded=st.session_state.set_expanders.get(exp_key, False), + ): + col1, col2 = st.columns([3, 1]) + + with col1: + _ = st.text_input( + "Nome Set", + value=row['name'], + key=f"set_name_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + + st.subheader("Domande in questo Set") + current_question_ids_in_set = row.get('questions', []) + if not isinstance(current_question_ids_in_set, list): + current_question_ids_in_set = [] + + if row['id'] not in st.session_state.question_checkboxes: + st.session_state.question_checkboxes[row['id']] = {} + + questions_detail = row.get('questions_detail', []) + if questions_detail: + for q in questions_detail: + q_id = str(q.get('id')) + q_text = q.get('domanda', f"ID Domanda: {q_id} (non trovata)") + q_cat = q.get('categoria', 'N/A') + display_text = f"{q_text} (Categoria: {q_cat})" + + checkbox_value = st.checkbox( + display_text, + value=True, + key=f"qcheck_{row['id']}_{q_id}", + on_change=mark_expander_open, + args=(exp_key,), + ) + st.session_state.question_checkboxes[row['id']][q_id] = checkbox_value + else: + st.info("Nessuna domanda in questo set.") + st.subheader("Aggiungi Domande al Set") + + # 初始化新选择的问题状态 + if row['id'] not in st.session_state.newly_selected_questions: + st.session_state.newly_selected_questions[row['id']] = [] + + if questions_ready: + all_questions_df = st.session_state.questions + available_questions_df = all_questions_df[ + ~all_questions_df['id'].astype(str).isin( + [str(q_id) for q_id in current_question_ids_in_set] + ) + ] + + if not available_questions_df.empty: + question_dict_for_multiselect = { + q_id: f"{q_text} (Cat: {q_cat})" + for q_id, q_text, q_cat in zip( + available_questions_df['id'].astype(str), + available_questions_df['domanda'], + available_questions_df['categoria'], + ) + } + newly_selected_questions_ids = st.multiselect( + "Seleziona domande da aggiungere", + options=list(question_dict_for_multiselect.keys()), + format_func=lambda x: question_dict_for_multiselect.get(x, x), + key=f"add_q_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids + else: + st.info("Nessuna altra domanda disponibile da aggiungere.") + else: + st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") + + with col2: + st.button( + "Salva Modifiche", + key=f"save_set_{row['id']}", + on_click=create_save_set_callback(row['id'], exp_key, state) + ) + + # Pulsante Elimina con dialog di conferma + if st.button( + "Elimina Set", + key=f"delete_set_{row['id']}", + type="secondary" + ): + mark_expander_open(exp_key) + confirm_delete_set_dialog(row['id'], row['name'], state) + + # Lo stato dell'expander viene aggiornato tramite i callback + + # Scheda Crea Nuovo Set + with tabs[1]: + st.header("Crea Nuovo Set di Domande") + + with st.form("create_set_form"): + set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") + + selected_qs_for_new_set = [] + questions_ready_for_creation = ( + 'questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns + ) + + if questions_ready_for_creation: + all_questions_df_creation = st.session_state.questions + question_dict_for_creation = { + q_id: f"{q_text} (Cat: {q_cat})" + for q_id, q_text, q_cat in zip( + all_questions_df_creation['id'].astype(str), + all_questions_df_creation['domanda'], + all_questions_df_creation['categoria'], + ) + } + + selected_qs_for_new_set = st.multiselect( + "Seleziona domande per questo set", + options=list(question_dict_for_creation.keys()), + format_func=lambda x: question_dict_for_creation.get(x, x), + key="create_set_questions", + ) + else: + st.info( + "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). \n" + "Vai a 'Gestione Domande' per aggiungere/caricare domande." + ) + + submitted = st.form_submit_button("Crea Set") + + if submitted: + if set_name: + set_id = create_set( + set_name, [str(q_id) for q_id in selected_qs_for_new_set] + ) + st.session_state.question_sets = load_sets() + state.create_set_success_message = ( + f"Set di domande creato con successo con ID: {set_id}" + ) + state.create_set_success = True + state.trigger_rerun = True + st.rerun() + else: + st.error("Il nome del set è obbligatorio.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Set da File") + + st.write(""" + Carica un file JSON o CSV contenente uno o più set di domande. + + ### Formato File JSON per Set Multipli: + ```json + [ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } + ] + ``` + + ### Formato CSV: + Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` + (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e + ``categoria``. + ```csv + name,id,domanda,risposta_attesa,categoria + Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia + Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia + Matematica Base,3,Quanto fa 2+2?,4,Matematica + Matematica Base,4,Quanto fa 10*4?,40,Matematica + ``` + + ### Note Importanti: + - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente + - Se un set con lo stesso nome esiste già, verrà saltato + - Solo le domande nuove verranno aggiunte al database + - Le domande esistenti verranno referenziate nei nuovi set + """) + + uploaded_file = st.file_uploader( + "Scegli un file", type=["json", "csv"], key="upload_set_file" + ) + + if uploaded_file is not None: + st.session_state.uploaded_file_content_set = uploaded_file + st.button( + "Importa Set", + key="import_set_btn", + on_click=lambda: import_set_callback(state) + ) + + +if __name__ == "__main__": + render() +else: + render() diff --git a/views/home.py b/views/home.py new file mode 100644 index 0000000..ec79396 --- /dev/null +++ b/views/home.py @@ -0,0 +1,108 @@ +"""Modulo della vista per la pagina Home dell'applicazione Streamlit.""" + +import logging + +import streamlit as st +from views.style_utils import add_home_styles +# from views import register_page + +logger = logging.getLogger(__name__) + + +# @register_page("Home") +def render(): + """Visualizza la pagina principale con le funzionalità della piattaforma.""" + + add_home_styles() + + st.markdown( + """ +
+

🧠 Piattaforma di Valutazione LLM

+

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

+
+""", + unsafe_allow_html=True, + ) + + # Box delle funzionalità con icone e stile migliorato + col1, col2 = st.columns(2) + + with col1: + st.markdown( + """ +
+

+ 📋 + Gestione delle Domande +

+

+ Crea, modifica e organizza le tue domande di test con le risposte previste. + Costruisci set di test completi per valutare le risposte LLM in modo efficiente. +

+
+ +
+

+ 🔌 + Supporto Multi-Provider API +

+

+ Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. + Configura parametri API e verifica le connessioni con feedback in tempo reale. +

+
+ """, + unsafe_allow_html=True, + ) + + with col2: + st.markdown( + """ +
+

+ 🧪 + Valutazione Automatizzata +

+

+ Esegui test con punteggio automatico rispetto alle risposte previste. + Valuta la somiglianza semantica tra testi con modelli linguistici. +

+
+ +
+

+ 📊 + Analisi Avanzata +

+

+ Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. + Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. +

+
+ """, + unsafe_allow_html=True, + ) + + st.markdown( + """ +
+

🚀 Iniziare

+
    +
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. +
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. +
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. +
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. +
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. +
+

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

+
+""", + unsafe_allow_html=True, + ) + + +if __name__ == "__main__": + render() +else: + render() diff --git a/views/session_state.py b/views/session_state.py new file mode 100644 index 0000000..cbd6f4e --- /dev/null +++ b/views/session_state.py @@ -0,0 +1,33 @@ +import logging + +import streamlit as st + +from controllers.startup_controller import get_initial_state +logger = logging.getLogger(__name__) + + +def ensure_keys(defaults: dict) -> None: + """Garantisce la presenza delle chiavi in ``st.session_state``. + + Parametri: + defaults: Dizionario con chiavi e valori da impostare se mancanti. + """ + for key, value in defaults.items(): + st.session_state.setdefault(key, value) + + +def initialize_session_state() -> None: + """Inizializza ``st.session_state`` con i valori di default.""" + required_keys = [ + "questions", + "question_sets", + "results", + "api_key", + "endpoint", + "model", + "temperature", + "max_tokens", + ] + if any(key not in st.session_state for key in required_keys): + defaults = get_initial_state() + ensure_keys(defaults) diff --git a/views/set_helpers.py b/views/set_helpers.py new file mode 100644 index 0000000..d903236 --- /dev/null +++ b/views/set_helpers.py @@ -0,0 +1,168 @@ +import logging +from typing import IO, cast + +import streamlit as st + +from controllers import update_set, delete_set +from models.question_set import QuestionSet +from .state_models import SetPageState +logger = logging.getLogger(__name__) + + +def save_set_callback( + set_id: str, + edited_name: str, + question_options_checkboxes: dict[str, bool], + newly_selected_questions_ids: list[str], + state: SetPageState, +) -> None: + kept_questions_ids = [q_id for q_id, keep in question_options_checkboxes.items() if keep] + updated_questions_ids = list( + set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids]) + ) + + try: + result = update_set(set_id, edited_name, updated_questions_ids) + if isinstance(result, tuple): + sets_df = result[0] + message = result[1] if len(result) > 1 else "Set di domande aggiornato con successo!" + if len(result) > 2 and isinstance(result[2], list): + for warn in result[2]: + st.warning(warn) + else: + sets_df = result + message = "Set di domande aggiornato con successo!" + + state.save_set_success_message = message + state.save_set_success = True + if sets_df is not None: + st.session_state.question_sets = sets_df + except Exception as exc: # pragma: no cover - UI error handling + state.save_set_error = True + state.save_set_error_message = str(exc) + + state.trigger_rerun = True + + +def delete_set_callback(set_id: str, state: SetPageState): + try: + result = delete_set(set_id) + if isinstance(result, tuple): + sets_df = result[0] + message = result[1] if len(result) > 1 else "Set di domande eliminato con successo!" + if len(result) > 2 and isinstance(result[2], list): + for warn in result[2]: + st.warning(warn) + else: + sets_df = result + message = "Set di domande eliminato con successo!" + + state.delete_set_success_message = message + state.delete_set_success = True + if sets_df is not None: + st.session_state.question_sets = sets_df + except Exception as exc: # pragma: no cover - UI error handling + state.save_set_error = True + state.save_set_error_message = str(exc) + + state.trigger_rerun = True + + +@st.dialog("Conferma Eliminazione") +def confirm_delete_set_dialog(set_id: str, set_name: str, state: SetPageState): + """Dialog di conferma per l'eliminazione del set di domande""" + st.write(f"Sei sicuro di voler eliminare il set '{set_name}'?") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + delete_set_callback(set_id, state) + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +def import_set_callback(state: SetPageState): + """Importa uno o più set di domande da file JSON o CSV.""" + + state.import_set_success = False + state.import_set_error = False + state.import_set_success_message = "" + state.import_set_error_message = "" + + uploaded_file = st.session_state.get("uploaded_file_content_set") + if uploaded_file is None: + raise ValueError("Nessun file caricato.") + try: + result = QuestionSet.import_from_file( + cast(IO[str] | IO[bytes], uploaded_file) + ) + + parts: list[str] = [] + if result.sets_imported_count > 0: + parts.append(f"{result.sets_imported_count} set importati") + if result.new_questions_added_count > 0: + parts.append(f"{result.new_questions_added_count} nuove domande aggiunte") + if result.existing_questions_found_count > 0: + parts.append( + f"{result.existing_questions_found_count} domande esistenti referenziate" + ) + + if parts: + message = ". ".join(parts) + "." + else: + message = "Nessun set importato." + if result.warnings: + message += " Controlla gli avvisi." + + state.import_set_success = True + state.import_set_success_message = message + + if result.questions_df is not None: + st.session_state.questions = result.questions_df + if result.sets_df is not None: + st.session_state.question_sets = result.sets_df + for warn in result.warnings: + st.warning(warn) + except Exception as exc: # pragma: no cover - UI error handling + state.import_set_error = True + state.import_set_error_message = str(exc) + + st.session_state.uploaded_file_content_set = None + st.session_state.pop("upload_set_file", None) + state.trigger_rerun = True + + +def mark_expander_open(exp_key: str): + """Segna l'expander come aperto nello stato di sessione""" + if "set_expanders" in st.session_state: + st.session_state.set_expanders[exp_key] = True + + +def create_save_set_callback(set_id: str, exp_key: str, state: SetPageState): + def callback(): + mark_expander_open(exp_key) + edited_name = st.session_state.get(f"set_name_{set_id}", "") + question_options_checkboxes = st.session_state.question_checkboxes.get(set_id, {}) + newly_selected_questions_ids = st.session_state.newly_selected_questions.get(set_id, []) + + save_set_callback( + set_id, + edited_name, + question_options_checkboxes, + newly_selected_questions_ids, + state, + ) + + return callback + + +def create_delete_set_callback(set_id: str, state: SetPageState): + def callback(): + delete_set_callback(set_id, state) + + return callback diff --git a/views/state_models.py b/views/state_models.py new file mode 100644 index 0000000..afec55e --- /dev/null +++ b/views/state_models.py @@ -0,0 +1,50 @@ +import logging + +from dataclasses import dataclass +logger = logging.getLogger(__name__) + + +@dataclass +class SetPageState: + """Stato UI temporaneo per la pagina di gestione dei set di domande.""" + + save_set_success: bool = False + save_set_success_message: str = "Set aggiornato con successo!" + save_set_error: bool = False + save_set_error_message: str = "Errore durante l'aggiornamento del set." + + delete_set_success: bool = False + delete_set_success_message: str = "Set eliminato con successo!" + + create_set_success: bool = False + create_set_success_message: str = "Set creato con successo!" + + import_set_success: bool = False + import_set_success_message: str = "Importazione completata con successo!" + import_set_error: bool = False + import_set_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False + + +@dataclass +class QuestionPageState: + """Stato UI temporaneo per la pagina di gestione delle domande.""" + + save_success: bool = False + save_success_message: str = "Domanda aggiornata con successo!" + save_error: bool = False + save_error_message: str = "Impossibile aggiornare la domanda." + + delete_success: bool = False + delete_success_message: str = "Domanda eliminata con successo!" + + add_success: bool = False + add_success_message: str = "Domanda aggiunta con successo!" + + import_success: bool = False + import_success_message: str = "Importazione completata con successo!" + import_error: bool = False + import_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False diff --git a/views/style_utils.py b/views/style_utils.py new file mode 100644 index 0000000..de72135 --- /dev/null +++ b/views/style_utils.py @@ -0,0 +1,61 @@ +"""Funzioni di utilità per applicare stili CSS nelle viste Streamlit. + +Centralizza l'iniezione di CSS per favorirne il riuso tra le pagine. +""" + +import logging +import streamlit as st +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def load_css(): + """ + Applica il CSS globale presente in 'styles.css'. + """ + css_path = Path(__file__).parent.parent / "views" / "styles.css" + if css_path.exists(): + css_content = css_path.read_text() + st.markdown(f"", unsafe_allow_html=True) + else: + st.warning("File styles.css non trovato. Assicurati che sia presente nella cartella views.") + + +def add_global_styles(): + """Aggiunge stili globali all'applicazione.""" + load_css() + + +def add_page_header(title: str, icon: str = "💡", description: str | None = None): + """Aggiunge un'intestazione di pagina stilizzata.""" + load_css() + st.markdown( + f""" + +
+ """, + unsafe_allow_html=True, + ) + + +def add_section_title(title: str, icon: str | None = None): + """Aggiunge un titolo di sezione stilizzato.""" + icon_text = f"{icon} " if icon else "" + st.markdown( + f"
{icon_text}{title}
", + unsafe_allow_html=True, + ) + + +def add_home_styles(): + """Applica gli stili CSS specifici della home page. + + Migliora la visibilità degli input nei temi chiaro e scuro e definisce + l'aspetto degli elementi principali come box funzionali e sezioni di + benvenuto. + """ + load_css() diff --git a/views/styles.css b/views/styles.css new file mode 100644 index 0000000..5656c39 --- /dev/null +++ b/views/styles.css @@ -0,0 +1,245 @@ +/* =========================== + LAYOUT GENERALE +=========================== */ +.main .block-container { + padding-top: 2rem; + padding-bottom: 2rem; +} + +/* =========================== + MENU +=========================== */ +section[data-testid="stSidebar"] div[data-testid="stSidebarNav"] span { + font-size: 20px !important; +} +section[data-testid="stSidebar"] div[data-testid="stSidebarNav"] svg { + width: 1.5em !important; + height: 1.5em !important; +} + +/* =========================== + INPUTS E SELETTORI +=========================== */ +.stTextInput input, +.stNumberInput input, +.stTextArea textarea, +.stSelectbox div[data-baseweb="select"], +.stMultiselect div[data-baseweb="select"] { + border-radius: 8px !important; + transition: all 0.3s ease !important; + width: 100% !important; + box-sizing: border-box !important; +} +.stTextInput input:focus, +.stNumberInput input:focus, +.stTextArea textarea:focus { + box-shadow: 0 0 0 3px rgba(79, 106, 240, 0.2) !important; +} +.stMultiselect span[data-baseweb="tag"] { + max-width: 100%; + white-space: normal !important; + flex-wrap: wrap; + overflow-wrap: anywhere; +} +div[data-baseweb="select"] * { + max-width: 100%; + overflow-wrap: anywhere; + white-space: normal !important; + word-break: break-word; +} + +/* =========================== + BOTTONI +=========================== */ +.stButton > button { + background-color: #4fbdf0 !important; + color: white !important; + border-radius: 8px !important; + font-weight: 600 !important; + padding: 0.5rem 1rem !important; + transition: all 0.3s ease !important; + box-shadow: 0 2px 5px rgba(79, 106, 240, 0.2) !important; +} +.stButton > button:hover { + background-color: #3a9ee0 !important; + box-shadow: 0 3px 10px rgba(79, 106, 240, 0.4) !important; + transform: translateY(-1px) !important; +} +.stFormSubmitButton > button { + background-color: #4fbdf0 !important; + color: white !important; + border-radius: 8px !important; + font-weight: 600 !important; + padding: 0.5rem 1rem !important; + transition: all 0.3s ease !important; + box-shadow: 0 2px 5px rgba(79, 106, 240, 0.2) !important; +} +.stFormSubmitButton > button:hover { + background-color: #3a9ee0 !important; + box-shadow: 0 3px 10px rgba(79, 106, 240, 0.4) !important; + transform: translateY(-1px) !important; +} +.st-emotion-cache-15yv26i > button { + background-color: #4fbdf0 !important; + color: white !important; + border-radius: 8px !important; + font-weight: 600 !important; + padding: 0.5rem 1rem !important; + transition: all 0.3s ease !important; + box-shadow: 0 2px 5px rgba(79, 106, 240, 0.2) !important; +} +.st-emotion-cache-15yv26i > button:hover { + background-color: #3a9ee0 !important; + box-shadow: 0 3px 10px rgba(79, 106, 240, 0.4) !important; + transform: translateY(-1px) !important; +} + +/* =========================== + CHECKBOX & RADIO +=========================== */ +.stCheckbox label, +.stRadio label { + font-weight: 400 !important; +} +.stCheckbox > div[role="radiogroup"] > label > div:first-child, +.stRadio > div[role="radiogroup"] > label > div:first-child { + border-color: #C0C9F1; +} + +/* =========================== + TABS +=========================== */ +.stTabs [data-baseweb="tab-list"] { + gap: 0.5rem; +} +.stTabs [data-baseweb="tab"] { + background-color: #EAEEFF !important; + color: #333333 !important; + border-radius: 8px 8px 0 0 !important; + padding: 0.5rem 1rem !important; + border-bottom: none !important; +} +.stTabs [aria-selected="true"] { + font-weight: 600 !important; + border-top: 2px solid #4F6AF0 !important; +} + +/* =========================== + CARD & SEZIONI +=========================== */ +.shadow-card { + border-radius: 10px; + padding: 1.5rem; + margin-bottom: 1.5rem; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05); +} +.page-header { + margin-bottom: 1.5rem; + padding: 1.5rem; + border-radius: 12px; + box-shadow: 0 4px 12px rgba(79, 106, 240, 0.1); + border-left: 5px solid #4F6AF0; +} +.page-title { + font-size: 2rem; + font-weight: bold; + margin-bottom: 0.5rem; +} +.page-description { + font-size: 1.1rem; + margin-bottom: 0.5rem; +} +hr.header-divider { + margin-top: 1rem; + margin-bottom: 2rem; + border: none; + height: 1px; + background: linear-gradient(to right, #4F6AF0, rgba(79, 106, 240, 0.1)); +} +.section-title { + font-size: 1.3rem; + font-weight: 600; + margin-top: 1.5rem; + margin-bottom: 1rem; + padding-bottom: 0.5rem; + border-bottom: 2px solid rgba(79, 106, 240, 0.2); +} + +/* =========================== + FEATURE BOXES +=========================== */ +.feature-box { + border-radius: 12px; + padding: 25px; + margin-bottom: 25px; + border-top: 4px solid #4F6AF0; + box-shadow: 0 6px 18px rgba(79, 106, 240, 0.1); + transition: all 0.3s ease; +} +.feature-box:hover { + transform: translateY(-5px); + box-shadow: 0 10px 25px rgba(79, 106, 240, 0.15); +} +.feature-title { + font-size: 1.3rem; + font-weight: 600; + margin-bottom: 15px; + display: flex; + align-items: center; +} +.feature-description { + font-size: 1rem; + line-height: 1.5; +} +.icon-large { + font-size: 2rem; + margin-right: 0.75rem; + width: 50px; + height: 50px; + line-height: 50px; + text-align: center; + border-radius: 50%; + box-shadow: 0 4px 10px rgba(79, 106, 240, 0.1); +} + +/* =========================== + WELCOME SECTION +=========================== */ +.welcome-section { + margin-bottom: 2.5rem; + padding: 2rem; + border-radius: 12px; + box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05); + border-left: 5px solid #4F6AF0; + background-color: --backgroundColor !important; +} +.welcome-title { + font-size: 2.2rem; + font-weight: bold; + margin-bottom: 1rem; +} +.subtitle { + font-size: 1.3rem; + line-height: 1.6; + margin-bottom: 1.5rem; +} +.getting-started { + padding: 2rem; + border-radius: 12px; + margin-top: 2rem; + margin-bottom: 2rem; + box-shadow: 0 6px 18px rgba(0,0,0,0.05); + border-left: 5px solid #4F6AF0; +} +.getting-started h3 { + margin-bottom: 1rem; +} +.getting-started ol { + padding-left: 1.5rem; +} +.getting-started li { + margin-bottom: 0.75rem; + line-height: 1.6; +} + + diff --git a/views/ui_utils.py b/views/ui_utils.py new file mode 100644 index 0000000..d1adb95 --- /dev/null +++ b/views/ui_utils.py @@ -0,0 +1,13 @@ +import logging + +from .style_utils import add_global_styles, add_page_header, add_section_title +from .component_utils import create_card, create_metrics_container +logger = logging.getLogger(__name__) + +__all__ = [ + "add_global_styles", + "add_page_header", + "add_section_title", + "create_card", + "create_metrics_container", +] diff --git a/views/visualizza_risultati.py b/views/visualizza_risultati.py new file mode 100644 index 0000000..4ebf04b --- /dev/null +++ b/views/visualizza_risultati.py @@ -0,0 +1,393 @@ +import logging + +import streamlit as st +import pandas as pd +import json +import plotly.express as px +import plotly.graph_objects as go + +from controllers import ( + import_results_action, + calculate_statistics, + load_sets, + get_results, + list_set_names, + list_model_names, + prepare_select_options, +) +# from views import register_page +from views.style_utils import add_page_header, add_section_title +logger = logging.getLogger(__name__) + + +# @register_page("Visualizzazione Risultati") +def render(): + add_page_header( + "Visualizzazione Risultati Test", + icon="📊", + description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." + ) + + # Carica i risultati utilizzando la cache + if 'results' not in st.session_state: + st.session_state.results = get_results(None, None) + if st.session_state.results.empty: + st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") + st.stop() + + # Carica i set di domande utilizzando la cache + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + # Stato per messaggi di importazione risultati + if 'import_results_success' not in st.session_state: + st.session_state.import_results_success = False + if 'import_results_error' not in st.session_state: + st.session_state.import_results_error = False + if 'import_results_message' not in st.session_state: + st.session_state.import_results_message = "" + + if st.session_state.import_results_success: + st.success(st.session_state.import_results_message) + st.session_state.import_results_success = False + if st.session_state.import_results_error: + st.error(st.session_state.import_results_message) + st.session_state.import_results_error = False + + def import_results_callback(): + """Callback per importare risultati da file JSON.""" + if ( + 'uploaded_results_file' in st.session_state + and st.session_state.uploaded_results_file is not None + ): + try: + results_df, message = import_results_action( + st.session_state.uploaded_results_file + ) + st.session_state.import_results_message = message + st.session_state.import_results_success = True + st.session_state.import_results_error = False + st.session_state.results = results_df + except Exception as exc: # noqa: BLE001 + st.session_state.import_results_message = str(exc) + st.session_state.import_results_success = False + st.session_state.import_results_error = True + st.session_state.uploaded_results_file = None + st.session_state.upload_results = None + + # Filtri per Set e Modello LLM + all_set_names = list_set_names(st.session_state.results, st.session_state.question_sets) + all_model_names = list_model_names(st.session_state.results) + + selected_set_filter = st.selectbox( + "Filtra per Set", + options=["Tutti"] + all_set_names, + index=0, + key="filter_set_name" + ) + + selected_model_filter = st.selectbox( + "Filtra per Modello LLM usato per la generazione della risposta", + options=["Tutti"] + all_model_names, + index=0, + key="filter_model_name" + ) + + filtered_results_df = get_results( + None if selected_set_filter == "Tutti" else selected_set_filter, + None if selected_model_filter == "Tutti" else selected_model_filter, + ) + + result_options = prepare_select_options( + filtered_results_df, st.session_state.question_sets + ) + + # Seleziona il risultato da visualizzare + selected_result_id = st.selectbox( + "Seleziona un Risultato del Test da Visualizzare", + options=list(result_options.keys()), + format_func=lambda x: result_options[x], + index=0 if result_options else None, + key="select_test_result_to_view" + ) + + # Opzionalmente seleziona un secondo risultato per il confronto + # Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso + compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] + compare_result_id = st.selectbox( + "Confronta con un altro risultato (opzionale)", + options=[None] + compare_options, + format_func=lambda x: "Nessun confronto" if x is None else result_options[x], + index=0, + key="select_test_result_compare" + ) + if not selected_result_id: + st.info("Nessun risultato selezionato o disponibile.") + st.stop() + + # Ottieni i dati del risultato selezionato + selected_result_row = filtered_results_df[ + filtered_results_df['id'] == selected_result_id + ].iloc[0] + result_data = selected_result_row['results'] + set_name_map = { + str(row['id']): row['name'] + for row in st.session_state.question_sets.to_dict('records') + } + set_name = set_name_map.get(str(selected_result_row['set_id']), 'Set Sconosciuto') + questions_results = result_data.get('questions', {}) + + with st.expander("Esporta/Importa Risultati"): + col_exp, col_imp = st.columns(2) + with col_exp: + selected_json = json.dumps({ + 'id': selected_result_row['id'], + 'set_id': selected_result_row['set_id'], + 'timestamp': selected_result_row['timestamp'], + 'results': result_data + }, indent=2) + selected_filename = st.text_input( + "Nome file per export risultato selezionato", + value=f"result_{selected_result_row['id']}.json", + key="selected_result_filename", + ) + if selected_filename and not selected_filename.endswith(".json"): + selected_filename += ".json" + st.download_button( + "Export Risultato Selezionato", + selected_json, + file_name=selected_filename, + mime="application/json" + ) + + all_filename = st.text_input( + "Nome file per export tutti i risultati", + value="all_results.json", + key="all_results_filename", + ) + if all_filename and not all_filename.endswith(".json"): + all_filename += ".json" + all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) + st.download_button( + "Export Tutti i Risultati", + all_json, + file_name=all_filename, + mime="application/json" + ) + + with col_imp: + uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") + if uploaded_file is not None: + st.session_state.uploaded_results_file = uploaded_file + st.button( + "Importa Risultati", + on_click=import_results_callback, + key="import_results_btn" + ) + + # Carica eventuale risultato di confronto + compare_result_row = None + compare_result_data = None + compare_questions_results = {} + if compare_result_id: + compare_result_row = filtered_results_df[ + filtered_results_df['id'] == compare_result_id + ].iloc[0] + compare_result_data = compare_result_row['results'] + compare_questions_results = compare_result_data.get('questions', {}) + + # Visualizza informazioni generali sul risultato + evaluation_method = result_data.get('method', 'LLM') + method_icon = "🤖" if evaluation_method == "LLM" else "📊" + method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" + + add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") + st.markdown(f"**ID Risultato:** `{selected_result_id}`") + st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") + st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") + + if 'generation_llm' in result_data: + st.markdown(f"**LLM Generazione Risposte:** `{result_data['generation_llm']}`") + elif 'generation_preset' in result_data: + st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") + if 'evaluation_llm' in result_data: + st.markdown(f"**LLM Valutazione Risposte:** `{result_data['evaluation_llm']}`") + elif 'evaluation_preset' in result_data: + st.markdown( + f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`" + ) + + # Metriche Generali del Test + add_section_title("Metriche Generali del Test", icon="📈") + + if questions_results: + stats = calculate_statistics(questions_results) + avg_score_overall = stats["avg_score"] + num_questions = len(stats["per_question_scores"]) + + cols_metrics = st.columns(2) + with cols_metrics[0]: + st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") + with cols_metrics[1]: + st.metric("Numero di Domande Valutate", num_questions) + + compare_stats = None + if compare_result_row is not None: + compare_stats = calculate_statistics(compare_questions_results) + compare_avg = compare_stats["avg_score"] + diff_avg = compare_avg - avg_score_overall + st.markdown("### Confronto") + cols_cmp = st.columns(3) + cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") + cols_cmp[1].metric("Punteggio Confronto", f"{compare_avg:.2f}%") + cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") + + scores_data = [] + for item in stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Selezionato"}) + if compare_stats: + for item in compare_stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Confronto"}) + + if scores_data: + df_scores = pd.DataFrame(scores_data) + unique_questions = len({d['Domanda'] for d in scores_data}) + fig = px.bar( + df_scores, + x='Domanda', + y='Punteggio', + color='Tipo', + barmode='group', + title="Punteggi per Domanda", + height=max(400, unique_questions * 30), + ) + fig.update_layout(yaxis_range=[0, 100]) + st.plotly_chart(fig, use_container_width=True) + + categories = ['Somiglianza', 'Correttezza', 'Completezza'] + fig_radar = go.Figure() + rm = stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[rm['similarity'], rm['correctness'], rm['completeness']], + theta=categories, + fill='toself', + name='Selezionato', + ) + ) + if compare_stats: + crm = compare_stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[crm['similarity'], crm['correctness'], crm['completeness']], + theta=categories, + fill='toself', + name='Confronto', + ) + ) + fig_radar.update_layout( + title="Grafico Radar delle Metriche LLM", + polar=dict(radialaxis=dict(visible=True, range=[0, 100])), + showlegend=True, + legend=dict( + orientation="h", + yanchor="bottom", + y=-0.2, + xanchor="center", + x=0.5, + ), + height=600, + ) + st.plotly_chart(fig_radar, use_container_width=True) + + st.subheader("Valori medi delle metriche") + cols = st.columns(3) + cols[0].metric("Somiglianza", f"{rm['similarity']:.2f}%") + cols[1].metric("Correttezza", f"{rm['correctness']:.2f}%") + cols[2].metric("Completezza", f"{rm['completeness']:.2f}%") + + if compare_stats: + cols_cmp = st.columns(3) + cols_cmp[0].metric("Somiglianza (Confronto)", f"{crm['similarity']:.2f}%") + cols_cmp[1].metric("Correttezza (Confronto)", f"{crm['correctness']:.2f}%") + cols_cmp[2].metric("Completezza (Confronto)", f"{crm['completeness']:.2f}%") + else: + st.info("Nessun dettaglio per le domande disponibile in questo risultato.") + + if compare_result_row is not None: + add_section_title("Confronto Dettagliato per Domanda", icon="🔍") + comparison_rows = [] + all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) + for qid in all_q_ids: + q1 = questions_results.get(qid, {}) + q2 = compare_questions_results.get(qid, {}) + label = q1.get('question') or q2.get('question') or str(qid) + score1 = q1.get('evaluation', {}).get('score', None) + score2 = q2.get('evaluation', {}).get('score', None) + delta = None + if score1 is not None and score2 is not None: + delta = score2 - score1 + comparison_rows.append({ + 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), + 'Selezionato': score1, + 'Confronto': score2, + 'Delta': delta + }) + if comparison_rows: + df_comp = pd.DataFrame(comparison_rows) + st.dataframe(df_comp) + + # Dettagli per ogni domanda + add_section_title("Risultati Dettagliati per Domanda", icon="📝") + if not questions_results: + st.info("Nessuna domanda trovata in questo set di risultati.") + else: + for q_id, q_data in questions_results.items(): + question_text = q_data.get('question', "Testo domanda non disponibile") + expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") + actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") + + with st.expander( + f"Domanda: {question_text[:100]}..." + ): + st.markdown(f"**Domanda:** {question_text}") + st.markdown(f"**Risposta Attesa:** {expected_answer}") + st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") + st.divider() + evaluation = q_data.get( + 'evaluation', {} + ) # Assicurati che evaluation sia sempre un dizionario + st.markdown("##### Valutazione LLM") + score = evaluation.get('score', 0) + explanation = evaluation.get( + 'explanation', "Nessuna spiegazione." + ) + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") + st.markdown(f"**Spiegazione:** {explanation}") + + cols_eval_metrics = st.columns(3) + cols_eval_metrics[0].metric( + "Somiglianza", f"{similarity:.2f}%" + ) + cols_eval_metrics[1].metric( + "Correttezza", f"{correctness:.2f}%" + ) + cols_eval_metrics[2].metric( + "Completezza", f"{completeness:.2f}%" + ) + + st.markdown("--- --- ---") + + +if __name__ == "__main__": + render() +else: + render()