From 0559f9d84cc4504704131a9df498780843a87b24 Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 1 Aug 2025 11:03:40 +0200 Subject: [PATCH 01/41] first commit --- .github/workflows/ci.yml | 38 ++ Dockerfile | 6 + app.py | 333 +++++++++++++++++ controllers/__init__.py | 0 controllers/api_preset_controller.py | 14 + controllers/openai_controller.py | 24 ++ controllers/question_controller.py | 86 +++++ controllers/question_set_controller.py | 212 +++++++++++ controllers/test_controller.py | 63 ++++ db.config.example | 14 + docker-compose.yml | 32 ++ initialize_db.py | 22 ++ logging_config.py | 10 + models/__init__.py | 0 models/api_preset.py | 47 +++ models/data/api_presets.csv | 4 + models/data/basic_math.json | 102 ++++++ models/data/capital_cities.json | 102 ++++++ models/data/question_sets.csv | 3 + models/data/questions.csv | 60 +++ models/data/test_results.csv | 2 + models/db_utils.py | 96 +++++ models/openai_service.py | 285 +++++++++++++++ models/question.py | 65 ++++ models/question_set.py | 56 +++ models/test_result.py | 57 +++ pytest.ini | 2 + requirements.txt | 10 + tests/conftest.py | 17 + tests/test_placeholder.py | 4 + tests/test_question_controller.py | 18 + tests/test_question_set_controller.py | 22 ++ view/api_configurazione.py | 266 ++++++++++++++ view/component_utils.py | 177 +++++++++ view/esecuzione_test.py | 239 ++++++++++++ view/gestione_domande.py | 292 +++++++++++++++ view/gestione_set.py | 379 +++++++++++++++++++ view/session_state.py | 12 + view/set_helpers.py | 119 ++++++ view/style_utils.py | 187 ++++++++++ view/ui_utils.py | 10 + view/visualizza_risultati.py | 487 +++++++++++++++++++++++++ 42 files changed, 3974 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 Dockerfile create mode 100644 app.py create mode 100644 controllers/__init__.py create mode 100644 controllers/api_preset_controller.py create mode 100644 controllers/openai_controller.py create mode 100644 controllers/question_controller.py create mode 100644 controllers/question_set_controller.py create mode 100644 controllers/test_controller.py create mode 100644 db.config.example create mode 100644 docker-compose.yml create mode 100644 initialize_db.py create mode 100644 logging_config.py create mode 100644 models/__init__.py create mode 100644 models/api_preset.py create mode 100644 models/data/api_presets.csv create mode 100644 models/data/basic_math.json create mode 100644 models/data/capital_cities.json create mode 100644 models/data/question_sets.csv create mode 100644 models/data/questions.csv create mode 100644 models/data/test_results.csv create mode 100644 models/db_utils.py create mode 100644 models/openai_service.py create mode 100644 models/question.py create mode 100644 models/question_set.py create mode 100644 models/test_result.py create mode 100644 pytest.ini create mode 100644 requirements.txt create mode 100644 tests/conftest.py create mode 100644 tests/test_placeholder.py create mode 100644 tests/test_question_controller.py create mode 100644 tests/test_question_set_controller.py create mode 100644 view/api_configurazione.py create mode 100644 view/component_utils.py create mode 100644 view/esecuzione_test.py create mode 100644 view/gestione_domande.py create mode 100644 view/gestione_set.py create mode 100644 view/session_state.py create mode 100644 view/set_helpers.py create mode 100644 view/style_utils.py create mode 100644 view/ui_utils.py create mode 100644 view/visualizza_risultati.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f5c588f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + services: + mysql: + image: mysql:8.0 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_ROOT_HOST: '%' + MYSQL_DATABASE: llm_platform + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + - name: Configure database for tests + run: sed -i 's/host=db/host=127.0.0.1/' db.config.example + - name: Run tests + run: pytest -v diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d777b03 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +CMD ["streamlit", "run", "app.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..91f5e3b --- /dev/null +++ b/app.py @@ -0,0 +1,333 @@ +import streamlit as st +import os +import importlib +import sys +import logging +from logging_config import setup_logging + +setup_logging() +logging.info("Applicazione avviata") + +from models.db_utils import init_db +from controllers.question_controller import load_questions +from controllers.question_set_controller import load_sets +from controllers.test_controller import load_results + +# Imposta la configurazione della pagina +st.set_page_config( + page_title="LLM Test Evaluation Platform", + page_icon="🧠", + layout="wide", + initial_sidebar_state="expanded" +) + +# Inizializza lo stato della sessione +if 'initialized' not in st.session_state: + st.session_state.initialized = False + +# Inizializza i file di dati se non esistono +if not st.session_state.initialized: + init_db() + st.session_state.initialized = True + +# Carica i dati nello stato della sessione se non sono già caricati +if 'questions' not in st.session_state: + st.session_state.questions = load_questions() + +if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + +if 'results' not in st.session_state: + st.session_state.results = load_results() + +# Configurazione API +if 'api_key' not in st.session_state: + st.session_state.api_key = os.environ.get('OPENAI_API_KEY', '') + +if 'endpoint' not in st.session_state: + st.session_state.endpoint = 'https://api.openai.com/v1' + +if 'model' not in st.session_state: + st.session_state.model = 'gpt-4o' + +if 'temperature' not in st.session_state: + st.session_state.temperature = 0.0 + +if 'max_tokens' not in st.session_state: + st.session_state.max_tokens = 1000 + +# Applicazione principale +st.title("🧠 LLM Test Evaluation Platform - Artificial QI") + +# Importa utilità UI +from view.style_utils import add_global_styles, add_page_header + +# Aggiungi CSS personalizzato e stili globali +add_global_styles() + +# Definisce le pagine disponibili e il menu laterale +PAGES = { + "Home": None, + "Configurazione API": "view.api_configurazione", + "Gestione Domande": "view.gestione_domande", + "Gestione Set di Domande": "view.gestione_set", + "Esecuzione Test": "view.esecuzione_test", + "Visualizzazione Risultati": "view.visualizza_risultati", +} + +selected_page = st.sidebar.radio("Navigazione", list(PAGES.keys())) + + +# CSS Estremo per Visibilità Input in Tema Scuro +st.markdown(""" + +""", unsafe_allow_html=True) + + +def show_home_page(): + """Visualizza la pagina principale con le funzionalità della piattaforma.""" + + st.markdown( + """ +
+

🧠 Piattaforma di Valutazione LLM

+

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

+
+""", + unsafe_allow_html=True, + ) + + # Box delle funzionalità con icone e stile migliorato + col1, col2 = st.columns(2) + + with col1: + st.markdown( + """ +
+

+ 📋 + Gestione delle Domande +

+

+ Crea, modifica e organizza le tue domande di test con le risposte previste. + Costruisci set di test completi per valutare le risposte LLM in modo efficiente. +

+
+ +
+

+ 🔌 + Supporto Multi-Provider API +

+

+ Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. + Configura parametri API e verifica le connessioni con feedback in tempo reale. +

+
+ """, + unsafe_allow_html=True, + ) + + with col2: + st.markdown( + """ +
+

+ 🧪 + Valutazione Automatizzata +

+

+ Esegui test con punteggio automatico rispetto alle risposte previste. + Valuta la somiglianza semantica tra testi con modelli linguistici. +

+
+ +
+

+ 📊 + Analisi Avanzata +

+

+ Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. + Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. +

+
+ """, + unsafe_allow_html=True, + ) + + st.markdown( + """ +
+

🚀 Iniziare

+
    +
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. +
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. +
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. +
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. +
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. +
+

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

+
+""", + unsafe_allow_html=True, + ) + + +if selected_page == "Home": + show_home_page() +else: + module_name = PAGES[selected_page] + if module_name in sys.modules: + importlib.reload(sys.modules[module_name]) + else: + importlib.import_module(module_name) diff --git a/controllers/__init__.py b/controllers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py new file mode 100644 index 0000000..a153660 --- /dev/null +++ b/controllers/api_preset_controller.py @@ -0,0 +1,14 @@ +import pandas as pd +from models.api_preset import APIPreset + + +def load_presets() -> pd.DataFrame: + return APIPreset.load_all() + + +def save_presets(df: pd.DataFrame) -> None: + APIPreset.save_df(df) + + +def delete_preset(preset_id: str) -> None: + APIPreset.delete(preset_id) diff --git a/controllers/openai_controller.py b/controllers/openai_controller.py new file mode 100644 index 0000000..42318c7 --- /dev/null +++ b/controllers/openai_controller.py @@ -0,0 +1,24 @@ +from models.openai_service import ( + evaluate_answer as _evaluate_answer, + generate_example_answer_with_llm as _generate_example_answer_with_llm, + test_api_connection as _test_api_connection, + DEFAULT_MODEL, + DEFAULT_ENDPOINT, +) + + +def evaluate_answer(question: str, expected_answer: str, actual_answer: str, + client_config: dict, show_api_details: bool = False): + return _evaluate_answer(question, expected_answer, actual_answer, + client_config, show_api_details) + + +def generate_example_answer_with_llm(question: str, client_config: dict, + show_api_details: bool = False): + return _generate_example_answer_with_llm(question, client_config, show_api_details) + + +def test_api_connection(api_key: str, endpoint: str, model: str, + temperature: float, max_tokens: int): + return _test_api_connection(api_key, endpoint, model, temperature, max_tokens) + diff --git a/controllers/question_controller.py b/controllers/question_controller.py new file mode 100644 index 0000000..0f70466 --- /dev/null +++ b/controllers/question_controller.py @@ -0,0 +1,86 @@ +from typing import Optional, Tuple +import pandas as pd +import os +import json +import uuid +from models.question import Question + + +def load_questions() -> pd.DataFrame: + return Question.load_all() + + +def add_question(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: + return Question.add(domanda, risposta_attesa, categoria, question_id) + + +def update_question(question_id: str, domanda: Optional[str] = None, risposta_attesa: Optional[str] = None, categoria: Optional[str] = None) -> None: + Question.update(question_id, domanda, risposta_attesa, categoria) + + +def delete_question(question_id: str) -> None: + Question.delete(question_id) + + +def add_question_if_not_exists(question_id: str, domanda: str, risposta_attesa: str, categoria: str = "") -> bool: + df = Question.load_all() + if str(question_id) in df['id'].astype(str).values: + return False + Question.add(domanda, risposta_attesa, categoria, question_id) + return True + + +def import_questions_from_file(file) -> Tuple[bool, str]: + """Importa domande da un file CSV o JSON.""" + try: + file_extension = os.path.splitext(file.name)[1].lower() + imported_df = None + + if file_extension == '.csv': + imported_df = pd.read_csv(file) + elif file_extension == '.json': + data = json.load(file) + if isinstance(data, list): + imported_df = pd.DataFrame(data) + elif isinstance(data, dict) and 'questions' in data and isinstance(data['questions'], list): + imported_df = pd.DataFrame(data['questions']) + else: + return False, "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." + else: + return False, "Formato file non supportato. Caricare un file CSV o JSON." + + if imported_df is None or imported_df.empty: + return False, "Il file importato è vuoto o non contiene dati validi." + + if 'question' in imported_df.columns and 'domanda' not in imported_df.columns: + imported_df.rename(columns={'question': 'domanda'}, inplace=True) + if 'expected_answer' in imported_df.columns and 'risposta_attesa' not in imported_df.columns: + imported_df.rename(columns={'expected_answer': 'risposta_attesa'}, inplace=True) + + required_columns = ['domanda', 'risposta_attesa'] + if not all(col in imported_df.columns for col in required_columns): + return False, f"Il file importato deve contenere le colonne '{required_columns[0]}' e '{required_columns[1]}'." + + if 'id' not in imported_df.columns: + imported_df['id'] = [str(uuid.uuid4()) for _ in range(len(imported_df))] + else: + imported_df['id'] = imported_df['id'].astype(str) + + if 'categoria' not in imported_df.columns: + imported_df['categoria'] = "" + else: + imported_df['categoria'] = imported_df['categoria'].astype(str).fillna("") + + imported_df['domanda'] = imported_df['domanda'].astype(str).fillna("") + imported_df['risposta_attesa'] = imported_df['risposta_attesa'].astype(str).fillna("") + + final_imported_df = imported_df[['id', 'domanda', 'risposta_attesa', 'categoria']] + + added_count = 0 + for _, row in final_imported_df.iterrows(): + Question.add(row['domanda'], row['risposta_attesa'], row['categoria'], question_id=row['id']) + added_count += 1 + + return True, f"Importate con successo {added_count} domande." + except Exception as e: + return False, f"Errore durante l'importazione delle domande: {str(e)}" diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py new file mode 100644 index 0000000..413e6ab --- /dev/null +++ b/controllers/question_set_controller.py @@ -0,0 +1,212 @@ +from typing import List, Optional, Any, Dict +import pandas as pd +import json +import os +from models.question_set import QuestionSet +from controllers.question_controller import ( + add_question_if_not_exists, + load_questions, +) + + +def load_sets() -> pd.DataFrame: + return QuestionSet.load_all() + + +def create_set(name: str, question_ids: Optional[List[str]] = None) -> str: + return QuestionSet.create(name, question_ids) + + +def update_set(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: + QuestionSet.update(set_id, name, question_ids) + + +def delete_set(set_id: str) -> None: + QuestionSet.delete(set_id) + + +def import_sets_from_file(uploaded_file) -> Dict[str, Any]: + """Importa uno o più set di domande da un file JSON o CSV.""" + result: Dict[str, Any] = { + "success": False, + "success_message": "", + "error_message": "", + "questions_df": None, + "sets_df": None, + "warnings": [], + } + + if uploaded_file is None: + result["error_message"] = "Nessun file fornito per l'importazione." + return result + + try: + file_extension = os.path.splitext(uploaded_file.name)[1].lower() + + if file_extension == ".csv": + df = pd.read_csv(uploaded_file) + required_cols = ["name", "id", "domanda", "risposta_attesa", "categoria"] + missing = [c for c in required_cols if c not in df.columns] + if missing: + raise ValueError( + "Il file CSV deve contenere le colonne " + ", ".join(required_cols) + ) + + sets_dict: Dict[str, List[Dict[str, str]]] = {} + for _, row in df.iterrows(): + name = str(row["name"]).strip() + if not name: + continue + question = { + "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", + "domanda": str(row["domanda"]).strip() if not pd.isna(row["domanda"]) else "", + "risposta_attesa": str(row["risposta_attesa"]).strip() if not pd.isna(row["risposta_attesa"]) else "", + "categoria": str(row["categoria"]).strip() if not pd.isna(row["categoria"]) else "", + } + sets_dict.setdefault(name, []).append(question) + + data = [{"name": n, "questions": qs} for n, qs in sets_dict.items()] + else: + string_data = uploaded_file.getvalue().decode("utf-8") + data = json.loads(string_data) + + current_questions = load_questions() + current_sets = load_sets() + + if not isinstance(data, list): + result["error_message"] = ( + "Formato JSON non valido. Il file deve contenere una lista (array) di set." + ) + return result + + sets_imported_count = 0 + new_questions_added_count = 0 + existing_questions_found_count = 0 + + for set_idx, set_data in enumerate(data): + if not isinstance(set_data, dict): + result["warnings"].append( + f"Elemento #{set_idx+1} nella lista non è un set valido (saltato)." + ) + continue + + set_name = set_data.get("name") + questions_in_set_data = set_data.get("questions", []) + + if not set_name or not isinstance(set_name, str) or not set_name.strip(): + result["warnings"].append( + f"Set #{set_idx+1} con nome mancante o non valido (saltato)." + ) + continue + + if not isinstance(questions_in_set_data, list): + result["warnings"].append( + f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." + ) + continue + + if set_name in current_sets["name"].values: + result["warnings"].append( + f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." + ) + continue + + current_set_question_ids: List[str] = [] + + for q_idx, q_data in enumerate(questions_in_set_data): + if isinstance(q_data, dict): + q_id = str(q_data.get("id", "")) + q_text = q_data.get("domanda", "") + q_answer = q_data.get("risposta_attesa", "") + q_category = q_data.get("categoria", "") + else: + q_id = str(q_data) + q_text = "" + q_answer = "" + q_category = "" + + if not q_id: + result["warnings"].append( + f"Domanda #{q_idx+1} nel set '{set_name}' senza ID (saltata)." + ) + continue + + if q_text and q_answer: + if q_id in current_questions["id"].astype(str).values: + existing_questions_found_count += 1 + current_set_question_ids.append(q_id) + else: + was_added = add_question_if_not_exists( + question_id=q_id, + domanda=q_text, + risposta_attesa=q_answer, + categoria=q_category, + ) + if was_added: + new_questions_added_count += 1 + current_set_question_ids.append(q_id) + new_row = pd.DataFrame( + { + "id": [q_id], + "domanda": [q_text], + "risposta_attesa": [q_answer], + "categoria": [q_category], + } + ) + current_questions = pd.concat( + [current_questions, new_row], ignore_index=True + ) + else: + existing_questions_found_count += 1 + current_set_question_ids.append(q_id) + continue + else: + if q_id in current_questions["id"].astype(str).values: + existing_questions_found_count += 1 + current_set_question_ids.append(q_id) + else: + result["warnings"].append( + f"Domanda #{q_idx+1} con ID {q_id} nel set '{set_name}' non trovata e senza dettagli; saltata." + ) + + if current_set_question_ids or len(questions_in_set_data) == 0: + try: + create_set(set_name, current_set_question_ids) + sets_imported_count += 1 + except Exception as e: + result["warnings"].append( + f"Errore durante la creazione del set '{set_name}': {e}" + ) + else: + result["warnings"].append( + f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + ) + + result["questions_df"] = load_questions() + result["sets_df"] = load_sets() + + if sets_imported_count > 0 or new_questions_added_count > 0: + success_parts = [] + if sets_imported_count > 0: + success_parts.append(f"{sets_imported_count} set importati") + if new_questions_added_count > 0: + success_parts.append(f"{new_questions_added_count} nuove domande aggiunte") + if existing_questions_found_count > 0: + success_parts.append( + f"{existing_questions_found_count} domande esistenti referenziate" + ) + + result["success"] = True + result["success_message"] = ". ".join(success_parts) + "." + else: + result["error_message"] = ( + "Nessun set o domanda valida trovata nel file per l'importazione." + ) + except json.JSONDecodeError: + result["error_message"] = ( + "Errore di decodifica JSON. Assicurati che il file sia un JSON valido." + ) + except Exception as e: + result["error_message"] = f"Errore imprevisto durante l'importazione: {str(e)}" + + return result diff --git a/controllers/test_controller.py b/controllers/test_controller.py new file mode 100644 index 0000000..05e91c6 --- /dev/null +++ b/controllers/test_controller.py @@ -0,0 +1,63 @@ +import pandas as pd +from typing import Dict, Tuple +import json +import uuid +from datetime import datetime +from models.test_result import TestResult + + +def load_results() -> pd.DataFrame: + return TestResult.load_all() + + +def add_result(set_id: str, results_data: Dict) -> str: + return TestResult.add(set_id, results_data) + + +def save_results(df: pd.DataFrame) -> None: + TestResult.save_df(df) + + +def import_results_from_file(file) -> Tuple[bool, str]: + """Importa risultati di test da un file JSON.""" + try: + data = json.load(file) + if isinstance(data, dict): + data = [data] + if not isinstance(data, list): + return False, "Il file JSON deve contenere un oggetto o una lista di risultati." + + results_df = load_results() + added_count = 0 + + for item in data: + if not isinstance(item, dict): + continue + + result_id = str(item.get('id', uuid.uuid4())) + if result_id in results_df['id'].astype(str).values: + continue + + set_id = str(item.get('set_id', '')) + timestamp = str(item.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) + results_content = item.get('results', {}) + + new_row = { + 'id': result_id, + 'set_id': set_id, + 'timestamp': timestamp, + 'results': results_content if isinstance(results_content, dict) else {} + } + + results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True) + added_count += 1 + + if added_count > 0: + save_results(results_df) + message = f"Importati {added_count} risultati." + else: + message = "Nessun nuovo risultato importato." + + return True, message + except Exception as e: + return False, f"Errore durante l'importazione dei risultati: {str(e)}" diff --git a/db.config.example b/db.config.example new file mode 100644 index 0000000..9d4ab66 --- /dev/null +++ b/db.config.example @@ -0,0 +1,14 @@ +[mysql] +host=localhost +user=root +password=your_password_here +database=llm_platform +port=3306 +ssl_ca= + +# For Docker environment, use: +# host=db +# user=root +# password= +# database=llm_platform +# port=3306 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0e4d95d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +services: + db: + image: mysql:8.0 + container_name: db + restart: always + ports: + - "3306:3306" + volumes: + - db_data:/var/lib/mysql + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_ROOT_HOST: '%' + networks: + - llm-network + + app: + build: . + container_name: llm-app + ports: + - '8501:8501' + volumes: + - .:/app + depends_on: + - db + command: streamlit run app.py --server.port 8501 --server.address 0.0.0.0 + networks: + - llm-network + +volumes: + db_data: +networks: + llm-network: diff --git a/initialize_db.py b/initialize_db.py new file mode 100644 index 0000000..ea0622a --- /dev/null +++ b/initialize_db.py @@ -0,0 +1,22 @@ +import logging +from logging_config import setup_logging + +try: + from models.db_utils import init_db +except ModuleNotFoundError as exc: + logging.error( + "Modulo mancante. Installa le dipendenze con 'pip install -r requirements.txt'" + ) + logging.error(f"Errore specifico: {exc}") + raise exc + +if __name__ == '__main__': + setup_logging() + logging.info("Inizializzazione del database in corso...") + try: + init_db() + logging.info("Database inizializzato con successo!") + except Exception as e: + logging.error(f"Errore durante l'inizializzazione del database: {e}") + logging.exception("Traceback dettagliato:") + diff --git a/logging_config.py b/logging_config.py new file mode 100644 index 0000000..c03423d --- /dev/null +++ b/logging_config.py @@ -0,0 +1,10 @@ +import logging + + +def setup_logging(level: int = logging.INFO) -> None: + """Configure root logger with a basic format.""" + logging.basicConfig( + level=level, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/api_preset.py b/models/api_preset.py new file mode 100644 index 0000000..e1e1992 --- /dev/null +++ b/models/api_preset.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass +from typing import Optional +import pandas as pd +from sqlalchemy import text + +from models.db_utils import get_engine + +@dataclass +class APIPreset: + id: str + name: str + provider_name: str + endpoint: str + api_key: str + model: str + temperature: float + max_tokens: int + + @staticmethod + def load_all() -> pd.DataFrame: + df = pd.read_sql("SELECT * FROM api_presets", get_engine()) + df['id'] = df['id'].astype(str) + return df + + @staticmethod + def save_df(df: pd.DataFrame) -> None: + engine = get_engine() + with engine.begin() as conn: + existing_ids = pd.read_sql('SELECT id FROM api_presets', conn)['id'].astype(str).tolist() + incoming_ids = df['id'].astype(str).tolist() + for del_id in set(existing_ids) - set(incoming_ids): + conn.execute(text('DELETE FROM api_presets WHERE id=:id'), {'id': del_id}) + for _, row in df.iterrows(): + # Convert NaN values from Pandas to None so that SQLAlchemy can + # correctly insert NULLs into the database instead of the string + # "nan" which would raise a ProgrammingError with MySQL. + params = {k: (None if pd.isna(v) else v) for k, v in row.to_dict().items()} + if row['id'] in existing_ids: + conn.execute(text('''UPDATE api_presets SET name=:name, provider_name=:provider_name, endpoint=:endpoint, api_key=:api_key, model=:model, temperature=:temperature, max_tokens=:max_tokens WHERE id=:id'''), params) + else: + conn.execute(text('''INSERT INTO api_presets (id, name, provider_name, endpoint, api_key, model, temperature, max_tokens) VALUES (:id, :name, :provider_name, :endpoint, :api_key, :model, :temperature, :max_tokens)'''), params) + + @staticmethod + def delete(preset_id: str) -> None: + engine = get_engine() + with engine.begin() as conn: + conn.execute(text('DELETE FROM api_presets WHERE id=:id'), {'id': preset_id}) diff --git a/models/data/api_presets.csv b/models/data/api_presets.csv new file mode 100644 index 0000000..355c736 --- /dev/null +++ b/models/data/api_presets.csv @@ -0,0 +1,4 @@ +id,name,provider_name,endpoint,api_key,model,temperature,max_tokens +6ba759ec-d6ec-4942-a764-0fbb2180771d,test,nan,https://api.openai.com/v1,test,gpt-4o,0.0,1000 +fe0c7c11-a959-4627-8701-8bf33efb7501,12e12e,nan,https://api.openai.com/v1,qweqweqwe,gpt-4o,0.0,1000 +9813c48d-b32c-4f29-b63e-0bc9ed4f693e,1231,nan,https://api.openai.com/v1,nan,gpt-4o,0.0,1000 diff --git a/models/data/basic_math.json b/models/data/basic_math.json new file mode 100644 index 0000000..50b4991 --- /dev/null +++ b/models/data/basic_math.json @@ -0,0 +1,102 @@ +[ + { + "domanda": "Quanto fa 7 + 5?", + "risposta_attesa": "12", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 9 x 3?", + "risposta_attesa": "27", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il quadrato di 6?", + "risposta_attesa": "36", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 100 ÷ 4?", + "risposta_attesa": "25", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è la radice quadrata di 49?", + "risposta_attesa": "7", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 15 - 8?", + "risposta_attesa": "7", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il risultato di 5²?", + "risposta_attesa": "25", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 3 x (2 + 4)?", + "risposta_attesa": "18", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 10 % di 200?", + "risposta_attesa": "20", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il minimo comune multiplo di 4 e 6?", + "risposta_attesa": "12", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa (8 + 2) x 5?", + "risposta_attesa": "50", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 2³?", + "risposta_attesa": "8", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il massimo comune divisore di 18 e 24?", + "risposta_attesa": "6", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 0 x 154?", + "risposta_attesa": "0", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il reciproco di 2?", + "risposta_attesa": "1/2", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 1/4 + 1/2?", + "risposta_attesa": "3/4", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 0.1 + 0.2?", + "risposta_attesa": "0.3", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è il risultato di -3 + 7?", + "risposta_attesa": "4", + "categoria": "Matematica di Base" + }, + { + "domanda": "Quanto fa 20 % di 80?", + "risposta_attesa": "16", + "categoria": "Matematica di Base" + }, + { + "domanda": "Qual è la metà di 1.5?", + "risposta_attesa": "0.75", + "categoria": "Matematica di Base" + } +] diff --git a/models/data/capital_cities.json b/models/data/capital_cities.json new file mode 100644 index 0000000..275f0e0 --- /dev/null +++ b/models/data/capital_cities.json @@ -0,0 +1,102 @@ +[ + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Italia?", + "risposta_attesa": "Roma", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Spagna?", + "risposta_attesa": "Madrid", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale del Regno Unito?", + "risposta_attesa": "Londra", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Grecia?", + "risposta_attesa": "Atene", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Svezia?", + "risposta_attesa": "Stoccolma", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Norvegia?", + "risposta_attesa": "Oslo", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dei Paesi Bassi?", + "risposta_attesa": "Amsterdam", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Danimarca?", + "risposta_attesa": "Copenaghen", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Polonia?", + "risposta_attesa": "Varsavia", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Belgio?", + "risposta_attesa": "Bruxelles", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Svizzera?", + "risposta_attesa": "Berna", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Austria?", + "risposta_attesa": "Vienna", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Finlandia?", + "risposta_attesa": "Helsinki", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Repubblica Ceca?", + "risposta_attesa": "Praga", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale del Portogallo?", + "risposta_attesa": "Lisboa", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Romania?", + "risposta_attesa": "Bucarest", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale dell'Ungheria?", + "risposta_attesa": "Budapest", + "categoria": "Capitali" + }, + { + "domanda": "Qual è la capitale della Bulgaria?", + "risposta_attesa": "Sofia", + "categoria": "Capitali" + } +] diff --git a/models/data/question_sets.csv b/models/data/question_sets.csv new file mode 100644 index 0000000..e9418eb --- /dev/null +++ b/models/data/question_sets.csv @@ -0,0 +1,3 @@ +id,name,questions +75de3c47-f0b9-4958-a3ff-eb5754ffa1c6,Matematica di Base,"[""c64efc53-5a3e-46ea-8293-7ade24b886bb"", ""deafc6ae-decd-4fd0-8d6d-438b90d36e58"", ""372bf717-e147-401e-a477-3705388f73bd"", ""2e43b39f-4e38-4c8e-bf47-16d1797c53cc"", ""5045a59c-6e92-4069-9ae4-14b63d745d0d"", ""4ee6f745-e500-4cfc-affd-103edc8a8c87"", ""1e8974db-17a1-42fd-befa-382b65d0e742"", ""15d2eb56-6bb6-4975-9a91-cb657c4471eb"", ""c7186103-8dfd-4d25-8873-405b85078a1b"", ""f86e3a38-0a40-4116-ba15-d1708d84e1dd"", ""4fe182fb-5894-46e8-b0a5-f7197af72ee2"", ""05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6"", ""b69ebaa6-1413-46bb-91cd-5585da5a56b4"", ""28ff22b3-2565-4700-b0e9-027dbb4aa862"", ""97a38e3b-b87e-48f9-9360-cfbea3bd822f"", ""40563245-06cc-4956-bddd-248f6418c00d"", ""c136bf44-742d-4493-b26c-63b72ab2279d"", ""42c1bc09-0949-4cdc-b8c2-408f8109d544"", ""2b5d1726-d399-4100-b6a6-abeb46f598ae"", ""481e3cbb-0b18-4139-a2e3-7a723b014414""]" +2bf4c0e3-d6e5-47b3-abb4-f461a07828e3,Capitali,"[""d1570b42-b01c-48c6-9c19-4ed6cf417c90"", ""2554f189-4c9f-4ebe-b475-6be2f4cf6f56"", ""a2608578-e0be-42bf-8060-bec2ed7fa8ec"", ""fd292041-d79d-400b-b333-96c579b80ac5"", ""923fe770-6b7f-45fb-8adb-a891ee87bd70"", ""f758b801-aa8a-4778-ae91-676e67d05148"", ""1f9ab468-f92b-4107-b37d-29c2efd1331b"", ""12714d1c-97a1-41fc-9782-db56b8b57c8e"", ""ff047143-7bc0-4444-bb3e-31c6f5000a8c"", ""4f98a718-03b3-41e6-81f6-90296532138e"", ""4024de79-d256-4b26-be15-8afd769254aa"", ""52b8f680-bb7e-4fc1-8151-191552850771"", ""40894524-2160-4aae-b169-a6966598580b"", ""07cafa28-0259-47d3-9285-9abb455f821e"", ""8a96aa6d-e976-41ac-9b09-2461e9af8af7"", ""371af137-9edb-49ce-977c-06b03dcae599"", ""52ce73ef-67b8-4bcf-8ae6-458a96657bd4"", ""1019288e-6c37-42b8-8a26-4167a1836aa9"", ""742219a0-76cf-4c70-a14f-fa11e0dd6c40"", ""62f15176-ca1d-4cae-9c12-13e943758621""]" diff --git a/models/data/questions.csv b/models/data/questions.csv new file mode 100644 index 0000000..0e086ae --- /dev/null +++ b/models/data/questions.csv @@ -0,0 +1,60 @@ +id,domanda,risposta_attesa,categoria +f051d43f-a099-4dc2-b12c-d400e7903da7,Qual è il quadrato di 6?,36,Matematica di Base +520b3a45-977c-436d-a393-1f7eda68111f,Quanto fa 100 ÷ 4?,25,Matematica di Base +eb44677c-6c3a-4bcc-a43d-16edae7ff1bf,Qual è la radice quadrata di 49?,7,Matematica di Base +a724c198-931c-4abf-814c-100356191f93,Quanto fa 15 - 8?,7,Matematica di Base +e253550c-6e04-423d-b276-1381d79c5af8,Qual è il risultato di 5²?,25,Matematica di Base +0fffd5d0-4d48-4b51-be4e-d9e8e8ad2ca5,Quanto fa 3 x (2 + 4)?,18,Matematica di Base +e16786cb-2261-493a-8712-b33ac4b5c142,Quanto fa 10 % di 200?,20,Matematica di Base +514ce7bf-823d-4baf-b9c8-4a00600ea908,Qual è il minimo comune multiplo di 4 e 6?,12,Matematica di Base +865097f0-a89b-4f9c-bdee-c0429ec27b2c,Quanto fa (8 + 2) x 5?,50,Matematica di Base +9f90d803-38f6-4f15-9c51-2d1848602e08,Quanto fa 2³?,8,Matematica di Base +cf805fa2-8fe2-4b58-b213-399136c3c030,Qual è il massimo comune divisore di 18 e 24?,6,Matematica di Base +11a3d746-e565-4458-a4cf-eb2f31f00d09,Quanto fa 0 x 154?,0,Matematica di Base +2e903e0b-bc0f-4a09-805b-1842f1f93cf7,Qual è il reciproco di 2?,1/2,Matematica di Base +23f3e6da-71bd-4ae3-8084-3d3a2d3875e0,Quanto fa 1/4 + 1/2?,3/4,Matematica di Base +de9450e6-6876-4dfa-9fbd-1bbc5a7b26aa,Quanto fa 0.1 + 0.2?,0.3,Matematica di Base +40064003-197c-4163-9a16-86cddbea341c,Qual è il risultato di -3 + 7?,4,Matematica di Base +320626f1-054c-4da8-a4a3-0a8a7662aa3f,Quanto fa 20 % di 80?,16,Matematica di Base +87d176d4-5190-4126-9aaf-9f291cb69007,Qual è la metà di 1.5?,0.75,Matematica di Base +c64efc53-5a3e-46ea-8293-7ade24b886bb,Quanto fa 7 + 5?,12,Matematica di Base +deafc6ae-decd-4fd0-8d6d-438b90d36e58,Quanto fa 9 × 3?,27,Matematica di Base +372bf717-e147-401e-a477-3705388f73bd,Qual è il quadrato di 6?,36,Matematica di Base +2e43b39f-4e38-4c8e-bf47-16d1797c53cc,Quanto fa 100 ÷ 4?,25,Matematica di Base +5045a59c-6e92-4069-9ae4-14b63d745d0d,Qual è la radice quadrata di 49?,7,Matematica di Base +4ee6f745-e500-4cfc-affd-103edc8a8c87,Quanto fa 15 - 8?,7,Matematica di Base +1e8974db-17a1-42fd-befa-382b65d0e742,Qual è il risultato di 5²?,25,Matematica di Base +15d2eb56-6bb6-4975-9a91-cb657c4471eb,Quanto fa 3 × (2 + 4)?,18,Matematica di Base +c7186103-8dfd-4d25-8873-405b85078a1b,Quanto fa 10 % di 200?,20,Matematica di Base +f86e3a38-0a40-4116-ba15-d1708d84e1dd,Qual è il minimo comune multiplo di 4 e 6?,12,Matematica di Base +4fe182fb-5894-46e8-b0a5-f7197af72ee2,Quanto fa (8 + 2) × 5?,50,Matematica di Base +05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6,Quanto fa 2³?,8,Matematica di Base +b69ebaa6-1413-46bb-91cd-5585da5a56b4,Qual è il massimo comune divisore di 18 e 24?,6,Matematica di Base +28ff22b3-2565-4700-b0e9-027dbb4aa862,Quanto fa 0 × 154?,0,Matematica di Base +97a38e3b-b87e-48f9-9360-cfbea3bd822f,Qual è il reciproco di 2?,1/2,Matematica di Base +40563245-06cc-4956-bddd-248f6418c00d,Quanto fa 1/4 + 1/2?,3/4,Matematica di Base +c136bf44-742d-4493-b26c-63b72ab2279d,Quanto fa 0.1 + 0.2?,0.3,Matematica di Base +42c1bc09-0949-4cdc-b8c2-408f8109d544,Qual è il risultato di -3 + 7?,4,Matematica di Base +2b5d1726-d399-4100-b6a6-abeb46f598ae,Quanto fa 20 % di 80?,16,Matematica di Base +481e3cbb-0b18-4139-a2e3-7a723b014414,Qual è la metà di 1.5?,0.75,Matematica di Base +d1570b42-b01c-48c6-9c19-4ed6cf417c90,Qual è la capitale della Francia?,Parigi,Capitali +2554f189-4c9f-4ebe-b475-6be2f4cf6f56,Qual è la capitale della Germania?,Berlino,Capitali +a2608578-e0be-42bf-8060-bec2ed7fa8ec,Qual è la capitale dell'Italia?,Roma,Capitali +fd292041-d79d-400b-b333-96c579b80ac5,Qual è la capitale della Spagna?,Madrid,Capitali +923fe770-6b7f-45fb-8adb-a891ee87bd70,Qual è la capitale del Regno Unito?,Londra,Capitali +f758b801-aa8a-4778-ae91-676e67d05148,Qual è la capitale della Grecia?,Atene,Capitali +1f9ab468-f92b-4107-b37d-29c2efd1331b,Qual è la capitale della Svezia?,Stoccolma,Capitali +12714d1c-97a1-41fc-9782-db56b8b57c8e,Qual è la capitale della Norvegia?,Oslo,Capitali +ff047143-7bc0-4444-bb3e-31c6f5000a8c,Qual è la capitale dei Paesi Bassi?,Amsterdam,Capitali +4f98a718-03b3-41e6-81f6-90296532138e,Qual è la capitale della Danimarca?,Copenaghen,Capitali +4024de79-d256-4b26-be15-8afd769254aa,Qual è la capitale della Polonia?,Varsavia,Capitali +52b8f680-bb7e-4fc1-8151-191552850771,Qual è la capitale della Belgio?,Bruxelles,Capitali +40894524-2160-4aae-b169-a6966598580b,Qual è la capitale della Svizzera?,Berna,Capitali +07cafa28-0259-47d3-9285-9abb455f821e,Qual è la capitale dell'Austria?,Vienna,Capitali +8a96aa6d-e976-41ac-9b09-2461e9af8af7,Qual è la capitale della Finlandia?,Helsinki,Capitali +371af137-9edb-49ce-977c-06b03dcae599,Qual è la capitale della Repubblica Ceca?,Praga,Capitali +52ce73ef-67b8-4bcf-8ae6-458a96657bd4,Qual è la capitale del Portogallo?,Lisboa,Capitali +1019288e-6c37-42b8-8a26-4167a1836aa9,Qual è la capitale della Romania?,Bucarest,Capitali +742219a0-76cf-4c70-a14f-fa11e0dd6c40,Qual è la capitale dell'Ungheria?,Budapest,Capitali +62f15176-ca1d-4cae-9c12-13e943758621,Qual è la capitale della Bulgaria?,Sofia,Capitali +d6a8a902-920e-41bc-9c2a-ecf7d6980f6a,Di che colore è il cavallo bianco di Napoleone?,Bianco, diff --git a/models/data/test_results.csv b/models/data/test_results.csv new file mode 100644 index 0000000..f9de9a3 --- /dev/null +++ b/models/data/test_results.csv @@ -0,0 +1,2 @@ +id,set_id,timestamp,results +b0ce010f-6f1a-405f-98cc-b46f075c7a6a,75de3c47-f0b9-4958-a3ff-eb5754ffa1c6,2025-06-24 13:22:16,"{""set_name"": ""Matematica di Base"", ""timestamp"": ""2025-06-24 13:22:16"", ""avg_score"": 91.25, ""sample_type"": ""Generata da LLM"", ""method"": ""LLM"", ""generation_preset"": ""t1"", ""evaluation_preset"": ""t1"", ""questions"": {""c64efc53-5a3e-46ea-8293-7ade24b886bb"": {""question"": ""Quanto fa 7 + 5?"", ""expected_answer"": ""12"", ""actual_answer"": ""7 + 5 fa 12."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, corretta e completa. Contiene la stessa informazione chiave (il risultato della somma) e utilizza una struttura simile, anche se leggermente pi\u00f9 elaborata."", ""similarity"": 90, ""correctness"": 100, ""completeness"": 95, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 7 + 5?\n Risposta Attesa: 12\n Risposta Effettiva: 7 + 5 fa 12.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, corretta e completa. Contiene la stessa informazione chiave (il risultato della somma) e utilizza una struttura simile, anche se leggermente pi\u00f9 elaborata.\"",\n \""similarity\"": 90,\n \""correctness\"": 100,\n \""completeness\"": 95\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 7 + 5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7 + 5 fa 12.""}}, ""deafc6ae-decd-4fd0-8d6d-438b90d36e58"": {""question"": ""Quanto fa 9 \u00d7 3?"", ""expected_answer"": ""27"", ""actual_answer"": ""27"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 9 \u00d7 3?\n Risposta Attesa: 27\n Risposta Effettiva: 27\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 9 \u00d7 3?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""27""}}, ""372bf717-e147-401e-a477-3705388f73bd"": {""question"": ""Qual \u00e8 il quadrato di 6?"", ""expected_answer"": ""36"", ""actual_answer"": ""Il quadrato di 6 \u00e8 36."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 semanticamente simile, corretta e completa rispetto alla risposta attesa. Contiene tutte le informazioni chiave e la correttezza \u00e8 garantita."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il quadrato di 6?\n Risposta Attesa: 36\n Risposta Effettiva: Il quadrato di 6 \u00e8 36.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 semanticamente simile, corretta e completa rispetto alla risposta attesa. Contiene tutte le informazioni chiave e la correttezza \u00e8 garantita.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il quadrato di 6?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il quadrato di 6 \u00e8 36.""}}, ""2e43b39f-4e38-4c8e-bf47-16d1797c53cc"": {""question"": ""Quanto fa 100 \u00f7 4?"", ""expected_answer"": ""25"", ""actual_answer"": ""100 \u00f7 4 fa **25**."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 perfettamente simile, corretta e completa rispetto alla risposta attesa. Il risultato calcolato \u00e8 25, che corrisponde esattamente all'attesa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 100 \u00f7 4?\n Risposta Attesa: 25\n Risposta Effettiva: 100 \u00f7 4 fa **25**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 perfettamente simile, corretta e completa rispetto alla risposta attesa. Il risultato calcolato \u00e8 25, che corrisponde esattamente all'attesa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 100 \u00f7 4?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""100 \u00f7 4 fa **25**.""}}, ""5045a59c-6e92-4069-9ae4-14b63d745d0d"": {""question"": ""Qual \u00e8 la radice quadrata di 49?"", ""expected_answer"": ""7"", ""actual_answer"": ""7"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, pertanto il punteggio complessivo \u00e8 al massimo."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 la radice quadrata di 49?\n Risposta Attesa: 7\n Risposta Effettiva: 7\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, pertanto il punteggio complessivo \u00e8 al massimo.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 la radice quadrata di 49?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7""}}, ""4ee6f745-e500-4cfc-affd-103edc8a8c87"": {""question"": ""Quanto fa 15 - 8?"", ""expected_answer"": ""7"", ""actual_answer"": ""7"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 15 - 8?\n Risposta Attesa: 7\n Risposta Effettiva: 7\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 15 - 8?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""7""}}, ""1e8974db-17a1-42fd-befa-382b65d0e742"": {""question"": ""Qual \u00e8 il risultato di 5\u00b2?"", ""expected_answer"": ""25"", ""actual_answer"": ""25"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il risultato di 5\u00b2?\n Risposta Attesa: 25\n Risposta Effettiva: 25\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il risultato di 5\u00b2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""25""}}, ""15d2eb56-6bb6-4975-9a91-cb657c4471eb"": {""question"": ""Quanto fa 3 \u00d7 (2 + 4)?"", ""expected_answer"": ""18"", ""actual_answer"": ""12"", ""evaluation"": {""score"": 50, ""explanation"": ""La risposta effettiva \u00e8 semanticamente simile ma errata rispetto alla risposta attesa. Non \u00e8 completa perch\u00e9 manca la correttezza."", ""similarity"": 50, ""correctness"": 0, ""completeness"": 50, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 3 \u00d7 (2 + 4)?\n Risposta Attesa: 18\n Risposta Effettiva: 12\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 50,\n \""explanation\"": \""La risposta effettiva \u00e8 semanticamente simile ma errata rispetto alla risposta attesa. Non \u00e8 completa perch\u00e9 manca la correttezza.\"",\n \""similarity\"": 50,\n \""correctness\"": 0,\n \""completeness\"": 50\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 3 \u00d7 (2 + 4)?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""12""}}, ""c7186103-8dfd-4d25-8873-405b85078a1b"": {""question"": ""Quanto fa 10 % di 200?"", ""expected_answer"": ""20"", ""actual_answer"": ""10 %."", ""evaluation"": {""score"": 50, ""explanation"": ""La risposta effettiva \u00e8 simile alla risposta attesa nel contesto (stima percentuale), ma \u00e8 mancante del risultato numerico corretto e non \u00e8 completa. La correttezza \u00e8 bassa perch\u00e9 il valore fornito \u00e8 errato."", ""similarity"": 60, ""correctness"": 20, ""completeness"": 30, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 10 % di 200?\n Risposta Attesa: 20\n Risposta Effettiva: 10 %.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 50,\n \""explanation\"": \""La risposta effettiva \u00e8 simile alla risposta attesa nel contesto (stima percentuale), ma \u00e8 mancante del risultato numerico corretto e non \u00e8 completa. La correttezza \u00e8 bassa perch\u00e9 il valore fornito \u00e8 errato.\"",\n \""similarity\"": 60,\n \""correctness\"": 20,\n \""completeness\"": 30\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 10 % di 200?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""10 %.""}}, ""f86e3a38-0a40-4116-ba15-d1708d84e1dd"": {""question"": ""Qual \u00e8 il minimo comune multiplo di 4 e 6?"", ""expected_answer"": ""12"", ""actual_answer"": ""Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel formato. \u00c8 semanticamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il minimo comune multiplo di 4 e 6?\n Risposta Attesa: 12\n Risposta Effettiva: Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel formato. \u00c8 semanticamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il minimo comune multiplo di 4 e 6?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il minimo comune multiplo (MCM) di 4 e 6 \u00e8 **12**.""}}, ""4fe182fb-5894-46e8-b0a5-f7197af72ee2"": {""question"": ""Quanto fa (8 + 2) \u00d7 5?"", ""expected_answer"": ""50"", ""actual_answer"": ""(8 + 2) \u00d7 5 = 10 \u00d7 5 = 50"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, coprendo tutti i punti chiave."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa (8 + 2) \u00d7 5?\n Risposta Attesa: 50\n Risposta Effettiva: (8 + 2) \u00d7 5 = 10 \u00d7 5 = 50\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa, coprendo tutti i punti chiave.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa (8 + 2) \u00d7 5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""(8 + 2) \u00d7 5 = 10 \u00d7 5 = 50""}}, ""05ba39ee-ba12-41d1-8d7c-4ab5b7b216c6"": {""question"": ""Quanto fa 2\u00b3?"", ""expected_answer"": ""8"", ""actual_answer"": ""2\u00b3 fa **8**."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, correttamente formattata e contenente la stessa informazione chiave. \u00c8 corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 2\u00b3?\n Risposta Attesa: 8\n Risposta Effettiva: 2\u00b3 fa **8**.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, correttamente formattata e contenente la stessa informazione chiave. \u00c8 corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 2\u00b3?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""2\u00b3 fa **8**.""}}, ""b69ebaa6-1413-46bb-91cd-5585da5a56b4"": {""question"": ""Qual \u00e8 il massimo comune divisore di 18 e 24?"", ""expected_answer"": ""6"", ""actual_answer"": ""Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6."", ""evaluation"": {""score"": 95, ""explanation"": ""La risposta effettiva \u00e8 molto simile alla risposta attesa, corrette e completa. Contiene l'informazione chiave (MCD \u00e8 6) in modo chiaro."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il massimo comune divisore di 18 e 24?\n Risposta Attesa: 6\n Risposta Effettiva: Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 95,\n \""explanation\"": \""La risposta effettiva \u00e8 molto simile alla risposta attesa, corrette e completa. Contiene l'informazione chiave (MCD \u00e8 6) in modo chiaro.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il massimo comune divisore di 18 e 24?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il massimo comune divisore (MCD) di 18 e 24 \u00e8 6.""}}, ""28ff22b3-2565-4700-b0e9-027dbb4aa862"": {""question"": ""Quanto fa 0 \u00d7 154?"", ""expected_answer"": ""0"", ""actual_answer"": ""0 \u00d7 154 fa 0."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 perfettamente correta e semanticamente identica alla risposta attesa. Contiene tutti i punti chiave richiesti."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 0 \u00d7 154?\n Risposta Attesa: 0\n Risposta Effettiva: 0 \u00d7 154 fa 0.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 perfettamente correta e semanticamente identica alla risposta attesa. Contiene tutti i punti chiave richiesti.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 0 \u00d7 154?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0 \u00d7 154 fa 0.""}}, ""97a38e3b-b87e-48f9-9360-cfbea3bd822f"": {""question"": ""Qual \u00e8 il reciproco di 2?"", ""expected_answer"": ""1/2"", ""actual_answer"": ""Il reciproco di 2 \u00e8 1/2."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel significato, quindi ottiene il massimo punteggio in tutte le metriche."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il reciproco di 2?\n Risposta Attesa: 1/2\n Risposta Effettiva: Il reciproco di 2 \u00e8 1/2.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa nel contenuto e nel significato, quindi ottiene il massimo punteggio in tutte le metriche.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il reciproco di 2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""Il reciproco di 2 \u00e8 1/2.""}}, ""40563245-06cc-4956-bddd-248f6418c00d"": {""question"": ""Quanto fa 1/4 + 1/2?"", ""expected_answer"": ""3/4"", ""actual_answer"": ""1/4 + 1/2 = 3/4."", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 1/4 + 1/2?\n Risposta Attesa: 3/4\n Risposta Effettiva: 1/4 + 1/2 = 3/4.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa. \u00c8 semanticamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 1/4 + 1/2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""1/4 + 1/2 = 3/4.""}}, ""c136bf44-742d-4493-b26c-63b72ab2279d"": {""question"": ""Quanto fa 0.1 + 0.2?"", ""expected_answer"": ""0.3"", ""actual_answer"": ""0.3"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 0.1 + 0.2?\n Risposta Attesa: 0.3\n Risposta Effettiva: 0.3\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 0.1 + 0.2?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0.3""}}, ""42c1bc09-0949-4cdc-b8c2-408f8109d544"": {""question"": ""Qual \u00e8 il risultato di -3 + 7?"", ""expected_answer"": ""4"", ""actual_answer"": ""4"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 il risultato di -3 + 7?\n Risposta Attesa: 4\n Risposta Effettiva: 4\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi \u00e8 perfettamente simile, corretta e completa.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 il risultato di -3 + 7?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""4""}}, ""2b5d1726-d399-4100-b6a6-abeb46f598ae"": {""question"": ""Quanto fa 20 % di 80?"", ""expected_answer"": ""16"", ""actual_answer"": ""20 %."", ""evaluation"": {""score"": 40, ""explanation"": ""La risposta effettiva \u00e8 incompleta e non fornisce il risultato numerico richiesto. Tuttavia, contiene l'indicazione del calcolo ('20 %'), che \u00e8 semanticamente simile alla risposta attesa. La correttezza \u00e8 bassa perch\u00e9 manca il risultato, e la completezza \u00e8 nulla poich\u00e9 non contiene il valore corretto."", ""similarity"": 50, ""correctness"": 20, ""completeness"": 0, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Quanto fa 20 % di 80?\n Risposta Attesa: 16\n Risposta Effettiva: 20 %.\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 40,\n \""explanation\"": \""La risposta effettiva \u00e8 incompleta e non fornisce il risultato numerico richiesto. Tuttavia, contiene l'indicazione del calcolo ('20 %'), che \u00e8 semanticamente simile alla risposta attesa. La correttezza \u00e8 bassa perch\u00e9 manca il risultato, e la completezza \u00e8 nulla poich\u00e9 non contiene il valore corretto.\"",\n \""similarity\"": 50,\n \""correctness\"": 20,\n \""completeness\"": 0\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Quanto fa 20 % di 80?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""20 %.""}}, ""481e3cbb-0b18-4139-a2e3-7a723b014414"": {""question"": ""Qual \u00e8 la met\u00e0 di 1.5?"", ""expected_answer"": ""0.75"", ""actual_answer"": ""0.75"", ""evaluation"": {""score"": 100, ""explanation"": ""La risposta effettiva \u00e8 identica alla risposta attesa, quindi ottiene il punteggio massimo in tutte le metriche."", ""similarity"": 100, ""correctness"": 100, ""completeness"": 100, ""api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""\n Sei un valutatore esperto che valuta la qualit\u00e0 delle risposte alle domande.\n Domanda: Qual \u00e8 la met\u00e0 di 1.5?\n Risposta Attesa: 0.75\n Risposta Effettiva: 0.75\n\n Valuta la risposta effettiva rispetto alla risposta attesa in base a:\n 1. Somiglianza (0-100): Quanto \u00e8 semanticamente simile la risposta effettiva a quella attesa?\n 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette?\n 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa?\n Calcola un punteggio complessivo (0-100) basato su queste metriche.\n Fornisci una breve spiegazione della tua valutazione (max 100 parole).\n Formatta la tua risposta come un oggetto JSON con questi campi:\n - score: il punteggio complessivo (numero)\n - explanation: la tua spiegazione (stringa)\n - similarity: punteggio di somiglianza (numero)\n - correctness: punteggio di correttezza (numero)\n - completeness: punteggio di completezza (numero)\n ""}], ""temperature"": 0.8, ""max_tokens"": 1000, ""response_format"": {""type"": ""json_object""}}, ""response_content"": ""\n{\n \""score\"": 100,\n \""explanation\"": \""La risposta effettiva \u00e8 identica alla risposta attesa, quindi ottiene il punteggio massimo in tutte le metriche.\"",\n \""similarity\"": 100,\n \""correctness\"": 100,\n \""completeness\"": 100\n}\n""}}, ""generation_api_details"": {""request"": {""model"": ""glm-4-flash-250414"", ""messages"": [{""role"": ""user"", ""content"": ""Rispondi alla seguente domanda in modo conciso e accurato: Qual \u00e8 la met\u00e0 di 1.5?""}], ""temperature"": 0.8, ""max_tokens"": 1000}, ""response_content"": ""0.75""}}}}" diff --git a/models/db_utils.py b/models/db_utils.py new file mode 100644 index 0000000..bcefa53 --- /dev/null +++ b/models/db_utils.py @@ -0,0 +1,96 @@ +import configparser +from pathlib import Path +from sqlalchemy import create_engine, text + + +def _ensure_database(cfg): + """Create the target database if it does not exist.""" + root_url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" + ) + engine = create_engine(root_url, isolation_level="AUTOCOMMIT") + with engine.begin() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + +_engine = None + + +def get_engine(): + """Restituisce un'istanza di motore SQLAlchemy.""" + global _engine + if _engine is None: + config = configparser.ConfigParser() + root = Path(__file__).resolve().parent.parent + cfg_path = root / 'db.config' + if not cfg_path.exists(): + cfg_path = root / 'db.config.example' + config.read(cfg_path) + cfg = config['mysql'] + _ensure_database(cfg) + url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}/{cfg['database']}" + ) + _engine = create_engine( + url, + pool_pre_ping=True, # Verifica che le connessioni siano attive + pool_recycle=3600, # Ricicla le connessioni inattive per evitare timeout + ) + return _engine + + +def init_db(): + """Crea le tabelle necessarie se non esistono.""" + engine = get_engine() + with engine.begin() as conn: + conn.execute( + text( + """CREATE TABLE IF NOT EXISTS questions ( + id VARCHAR(36) PRIMARY KEY, + domanda TEXT, + risposta_attesa TEXT, + categoria TEXT + )""" + ) + ) + conn.execute( + text( + """CREATE TABLE IF NOT EXISTS question_sets ( + id VARCHAR(36) PRIMARY KEY, + name TEXT + )""" + ) + ) + conn.execute( + text( + """CREATE TABLE IF NOT EXISTS question_set_questions ( + set_id VARCHAR(36), + question_id VARCHAR(36), + PRIMARY KEY (set_id, question_id) + )""" + ) + ) + conn.execute( + text( + """CREATE TABLE IF NOT EXISTS test_results ( + id VARCHAR(36) PRIMARY KEY, + set_id VARCHAR(36), + timestamp TEXT, + results JSON + )""" + ) + ) + conn.execute( + text( + """CREATE TABLE IF NOT EXISTS api_presets ( + id VARCHAR(36) PRIMARY KEY, + name TEXT, + provider_name TEXT, + endpoint TEXT, + api_key TEXT, + model TEXT, + temperature FLOAT, + max_tokens INT + )""" + ) + ) + diff --git a/models/openai_service.py b/models/openai_service.py new file mode 100644 index 0000000..b378645 --- /dev/null +++ b/models/openai_service.py @@ -0,0 +1,285 @@ +import os +import json +import logging +from openai import OpenAI, APIConnectionError, RateLimitError, APIStatusError + +DEFAULT_MODEL = "gpt-4o" +DEFAULT_ENDPOINT = "https://api.openai.com/v1" + +# Modelli disponibili per diversi provider (esempio) +OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] +ANTHROPIC_MODELS = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"] +# Aggiungi altri provider e modelli se necessario +# XAI_MODELS = ["grok-1"] + +def get_openai_client(api_key: str, base_url: str = None): + """ + Crea e restituisce un client OpenAI configurato. + Args: + api_key: La chiave API. + base_url: L'URL base dell'endpoint API (opzionale, default a OpenAI). + Returns: + Un'istanza del client OpenAI o None se la chiave API non è fornita. + """ + if not api_key: + # Commentato per ridurre output UI + logging.warning("Tentativo di creare client OpenAI senza chiave API.") + return None + try: + # Se base_url è None, "custom", o vuoto, usa il default di OpenAI. + # Altrimenti, usa il base_url fornito. + effective_base_url = base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT + return OpenAI(api_key=api_key, base_url=effective_base_url) + except Exception as e: + logging.error(f"Errore durante la creazione del client OpenAI: {e}") + return None + +def evaluate_answer(question: str, expected_answer: str, actual_answer: str, + client_config: dict, show_api_details: bool = False): + """ + Valuta una risposta utilizzando un LLM specificato tramite client_config. + Args: + question: La domanda. + expected_answer: La risposta attesa. + actual_answer: La risposta effettiva da valutare. + client_config: Dizionario contenente {api_key, endpoint, model, temperature, max_tokens}. + show_api_details: Se True, include i dettagli della richiesta/risposta API. + Returns: + Un dizionario con il punteggio e la spiegazione, o un risultato di errore. + """ + client = get_openai_client(api_key=client_config.get("api_key"), base_url=client_config.get("endpoint")) + if not client: + return {"score": 0, "explanation": "Errore: Client API per la valutazione non configurato.", "similarity": 0, "correctness": 0, "completeness": 0} + + prompt = f""" + Sei un valutatore esperto che valuta la qualità delle risposte alle domande. + Domanda: {question} + Risposta Attesa: {expected_answer} + Risposta Effettiva: {actual_answer} + + Valuta la risposta effettiva rispetto alla risposta attesa in base a: + 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? + 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? + 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? + Calcola un punteggio complessivo (0-100) basato su queste metriche. + Fornisci una breve spiegazione della tua valutazione (max 100 parole). + Formatta la tua risposta come un oggetto JSON con questi campi: + - score: il punteggio complessivo (numero) + - explanation: la tua spiegazione (stringa) + - similarity: punteggio di somiglianza (numero) + - correctness: punteggio di correttezza (numero) + - completeness: punteggio di completezza (numero) + Esempio di risposta JSON: + {{ + "score": 95, + "explanation": "La risposta è corretta e completa", + "similarity": 90, + "correctness": 100, + "completeness": 95 + }} + """ + + api_request_details = { + "model": client_config.get("model", DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.0), + "max_tokens": client_config.get("max_tokens", 250), # Aumentato leggermente per JSON più complesso + "response_format": {"type": "json_object"} + } + + api_details_for_log = {} + if show_api_details: + # Copia i dettagli della richiesta per loggarli, escludendo dati sensibili se necessario + # (in questo caso, la chiave API è gestita dal client e non è direttamente nei dettagli della richiesta) + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + content = response.choices[0].message.content or "{}" + if show_api_details: + api_details_for_log["response_content"] = content + + try: + evaluation = json.loads(content) + required_keys = ['score', 'explanation', 'similarity', 'correctness', 'completeness'] + if not all(key in evaluation for key in required_keys): + logging.warning( + f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." + ) + for key in required_keys: + if key not in evaluation: + evaluation[key] = 0 if key != 'explanation' else "Valutazione incompleta o formato JSON non corretto." + + evaluation['api_details'] = api_details_for_log + return evaluation + except json.JSONDecodeError: + logging.error( + f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" + ) + return { + "score": 0, "explanation": f"Errore di decodifica JSON: {content[:100]}...", + "similarity": 0, "correctness": 0, "completeness": 0, + "api_details": api_details_for_log + } + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error( + f"Errore API durante la valutazione: {type(e).__name__} - {e}" + ) + api_details_for_log["error"] = str(e) + return { + "score": 0, "explanation": f"Errore API: {type(e).__name__}", + "similarity": 0, "correctness": 0, "completeness": 0, + "api_details": api_details_for_log + } + except Exception as e: + logging.error( + f"Errore imprevisto durante la valutazione: {type(e).__name__} - {e}" + ) + api_details_for_log["error"] = str(e) + return { + "score": 0, "explanation": f"Errore imprevisto: {type(e).__name__}", + "similarity": 0, "correctness": 0, "completeness": 0, + "api_details": api_details_for_log + } + +def generate_example_answer_with_llm(question: str, client_config: dict, show_api_details: bool = False): + """ + Genera una risposta di esempio per una domanda utilizzando un LLM. + Args: + question: La domanda per cui generare una risposta. + client_config: Dizionario contenente {api_key, endpoint, model, temperature, max_tokens}. + show_api_details: Se True, include i dettagli della chiamata API nel risultato. + Returns: + Un dizionario con { "answer": "risposta generata" | None, "api_details": {...} | None }. + """ + client = get_openai_client(api_key=client_config.get("api_key"), base_url=client_config.get("endpoint")) + if not client: + logging.error("Client API per la generazione risposte non configurato.") + return { + "answer": None, + "api_details": {"error": "Client API non configurato"} + if show_api_details + else None, + } + + # Controllo se la domanda è None o una stringa vuota + if question is None or not isinstance(question, str) or question.strip() == "": + logging.error("La domanda fornita è vuota o non valida.") + return { + "answer": None, + "api_details": {"error": "Domanda vuota o non valida"} + if show_api_details + else None, + } + + prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" + + api_request_details = { + "model": client_config.get("model", DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.7), + "max_tokens": client_config.get("max_tokens", 500) + } + + api_details_for_log = {} + if show_api_details: + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + answer = response.choices[0].message.content.strip() if response.choices and response.choices[0].message.content else None + if show_api_details: + api_details_for_log["response_content"] = response.choices[0].message.content if response.choices else "Nessun contenuto" + return {"answer": answer, "api_details": api_details_for_log if show_api_details else None} + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error( + f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + ) + if show_api_details: + api_details_for_log["error"] = str(e) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + except Exception as e: + logging.error( + f"Errore imprevisto durante la generazione della risposta: {type(e).__name__} - {e}" + ) + if show_api_details: + api_details_for_log["error"] = str(e) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + +def test_api_connection(api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int): + """ + Testa la connessione all'API LLM con i parametri forniti. + """ + client = get_openai_client(api_key=api_key, base_url=endpoint) + if not client: + return False, "Client API non inizializzato. Controlla chiave API e endpoint." + + try: + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'"}], + temperature=temperature, + max_tokens=max_tokens # Assicurati che sia sufficiente per la risposta attesa + ) + content = response.choices[0].message.content or "" + if "Connessione riuscita." in content: + return True, "Connessione API riuscita!" + else: + return False, f"Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): {content[:200]}..." + except APIConnectionError as e: + return False, f"Errore di connessione API: {e}" + except RateLimitError as e: + return False, f"Errore di Rate Limit API: {e}" + except APIStatusError as e: + return False, f"Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', autenticazione fallita, quota superata): {e.status_code} - {e.message}" + except Exception as e: + return False, f"Errore imprevisto durante il test della connessione: {type(e).__name__} - {e}" + +def get_available_models_for_endpoint(provider_name: str, endpoint_url: str = None, api_key: str = None): + """ + Restituisce una lista di modelli disponibili basata sul provider o tenta di elencarli dall'endpoint. + Args: + provider_name: Nome del provider (es. "OpenAI", "Anthropic", "Personalizzato"). + endpoint_url: URL dell'endpoint (rilevante per "Personalizzato"). + api_key: Chiave API per autenticarsi (necessaria per elencare modelli da endpoint personalizzati). + Returns: + Una lista di stringhe di nomi di modelli. + """ + if provider_name == "OpenAI": + return OPENAI_MODELS + elif provider_name == "Anthropic": + return ANTHROPIC_MODELS + # Aggiungi altri provider predefiniti qui + # elif provider_name == "XAI": + # return XAI_MODELS + elif provider_name == "Personalizzato": + if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): + # Se non ci sono informazioni sufficienti, restituisce una lista di fallback + return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] + + client = get_openai_client(api_key=api_key, base_url=endpoint_url) + if not client: + return ["(Errore creazione client API)", DEFAULT_MODEL] + try: + models = client.models.list() + # Filtra per modelli che non sono di embedding + filtered_models = sorted([ + model.id for model in models + if not any(term in model.id.lower() for term in ["embed", "embedding"]) + and (any(term in model.id.lower() for term in ["chat", "instruct", "gpt", "claude", "grok"]) or len(model.id.split('-')) > 2) + ]) + if not filtered_models: + # Se il filtro aggressivo non trova nulla, restituisci tutti i modelli non di embedding + filtered_models = sorted([model.id for model in models if not any(term in model.id.lower() for term in ["embed", "embedding"])]) + return filtered_models if filtered_models else [DEFAULT_MODEL] + except Exception as e: + return ["(Errore recupero modelli)", DEFAULT_MODEL] + return [DEFAULT_MODEL] # Default generale se il provider non è riconosciuto diff --git a/models/question.py b/models/question.py new file mode 100644 index 0000000..6a06f8f --- /dev/null +++ b/models/question.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass +from typing import Optional +import uuid +import pandas as pd +from sqlalchemy import text + +from models.db_utils import get_engine + +@dataclass +class Question: + id: str + domanda: str + risposta_attesa: str + categoria: str = "" + + @staticmethod + def load_all() -> pd.DataFrame: + engine = get_engine() + df = pd.read_sql("SELECT * FROM questions", engine) + if 'categoria' not in df.columns: + df['categoria'] = "" + df['id'] = df['id'].astype(str) + df['domanda'] = df['domanda'].astype(str).fillna("") + df['risposta_attesa'] = df['risposta_attesa'].astype(str).fillna("") + df['categoria'] = df['categoria'].astype(str).fillna("") + return df + + @staticmethod + def add(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: + qid = question_id or str(uuid.uuid4()) + engine = get_engine() + with engine.begin() as conn: + conn.execute( + text( + "INSERT INTO questions (id, domanda, risposta_attesa, categoria) VALUES (:id, :domanda, :risposta_attesa, :categoria)" + ), + {"id": qid, "domanda": domanda, "risposta_attesa": risposta_attesa, "categoria": categoria}, + ) + return qid + + @staticmethod + def update(question_id: str, domanda: Optional[str] = None, risposta_attesa: Optional[str] = None, categoria: Optional[str] = None) -> None: + updates = [] + params = {"id": question_id} + if domanda is not None: + updates.append("domanda=:domanda") + params["domanda"] = domanda + if risposta_attesa is not None: + updates.append("risposta_attesa=:risposta_attesa") + params["risposta_attesa"] = risposta_attesa + if categoria is not None: + updates.append("categoria=:categoria") + params["categoria"] = categoria + if not updates: + return + engine = get_engine() + with engine.begin() as conn: + conn.execute(text(f"UPDATE questions SET {', '.join(updates)} WHERE id=:id"), params) + + @staticmethod + def delete(question_id: str) -> None: + engine = get_engine() + with engine.begin() as conn: + conn.execute(text("DELETE FROM question_set_questions WHERE question_id=:id"), {"id": question_id}) + conn.execute(text("DELETE FROM questions WHERE id=:id"), {"id": question_id}) diff --git a/models/question_set.py b/models/question_set.py new file mode 100644 index 0000000..0b61f67 --- /dev/null +++ b/models/question_set.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass, field +from typing import List, Optional +import uuid +import pandas as pd +from sqlalchemy import text + +from models.db_utils import get_engine + +@dataclass +class QuestionSet: + id: str + name: str + questions: List[str] = field(default_factory=list) + + @staticmethod + def load_all() -> pd.DataFrame: + engine = get_engine() + sets_df = pd.read_sql("SELECT id, name FROM question_sets", engine) + rel_df = pd.read_sql("SELECT set_id, question_id FROM question_set_questions", engine) + sets_df['questions'] = sets_df['id'].apply(lambda sid: rel_df[rel_df['set_id']==sid]['question_id'].tolist()) + sets_df['id'] = sets_df['id'].astype(str) + sets_df['name'] = sets_df['name'].astype(str).fillna("") + return sets_df + + @staticmethod + def create(name: str, question_ids: Optional[List[str]] = None) -> str: + set_id = str(uuid.uuid4()) + q_ids = [str(q) for q in (question_ids or [])] + engine = get_engine() + with engine.begin() as conn: + conn.execute(text("INSERT INTO question_sets (id, name) VALUES (:id, :name)"), {"id": set_id, "name": name}) + for qid in q_ids: + conn.execute(text("INSERT INTO question_set_questions (set_id, question_id) VALUES (:sid, :qid)"), {"sid": set_id, "qid": qid}) + return set_id + + @staticmethod + def update(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: + engine = get_engine() + with engine.begin() as conn: + if name is not None: + conn.execute(text("UPDATE question_sets SET name=:name WHERE id=:id"), {"id": set_id, "name": name}) + if question_ids is not None: + existing = conn.execute(text("SELECT question_id FROM question_set_questions WHERE set_id=:sid"), {"sid": set_id}).fetchall() + existing_ids = [r[0] for r in existing] + new_ids = [str(q) for q in question_ids] + for qid in set(existing_ids) - set(new_ids): + conn.execute(text("DELETE FROM question_set_questions WHERE set_id=:sid AND question_id=:qid"), {"sid": set_id, "qid": qid}) + for qid in set(new_ids) - set(existing_ids): + conn.execute(text("INSERT INTO question_set_questions (set_id, question_id) VALUES (:sid, :qid)"), {"sid": set_id, "qid": qid}) + + @staticmethod + def delete(set_id: str) -> None: + engine = get_engine() + with engine.begin() as conn: + conn.execute(text("DELETE FROM question_set_questions WHERE set_id=:id"), {"id": set_id}) + conn.execute(text("DELETE FROM question_sets WHERE id=:id"), {"id": set_id}) diff --git a/models/test_result.py b/models/test_result.py new file mode 100644 index 0000000..5039410 --- /dev/null +++ b/models/test_result.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass +from typing import Dict, Optional +import uuid +import json +import pandas as pd +from sqlalchemy import text + +from models.db_utils import get_engine + +@dataclass +class TestResult: + id: str + set_id: str + timestamp: str + results: Dict + + @staticmethod + def load_all() -> pd.DataFrame: + df = pd.read_sql("SELECT * FROM test_results", get_engine()) + if 'results' in df.columns: + df['results'] = df['results'].apply(lambda x: json.loads(x) if isinstance(x, str) else {}) + df['id'] = df['id'].astype(str) + return df + + @staticmethod + def save_df(df: pd.DataFrame) -> None: + df_to_save = df.copy() + if 'results' in df_to_save.columns: + df_to_save['results'] = df_to_save['results'].apply(lambda x: json.dumps(x) if isinstance(x, dict) else '{}') + engine = get_engine() + with engine.begin() as conn: + existing_ids = pd.read_sql('SELECT id FROM test_results', conn)['id'].astype(str).tolist() + incoming_ids = df_to_save['id'].astype(str).tolist() + for rid in set(existing_ids) - set(incoming_ids): + conn.execute(text('DELETE FROM test_results WHERE id=:id'), {'id': rid}) + for _, row in df_to_save.iterrows(): + params = row.to_dict() + if row['id'] in existing_ids: + conn.execute(text('''UPDATE test_results SET set_id=:set_id, timestamp=:timestamp, results=:results WHERE id=:id'''), params) + else: + conn.execute(text('''INSERT INTO test_results (id, set_id, timestamp, results) VALUES (:id, :set_id, :timestamp, :results)'''), params) + + @staticmethod + def add(set_id: str, results_data: Dict) -> str: + result_id = str(uuid.uuid4()) + engine = get_engine() + with engine.begin() as conn: + conn.execute( + text('INSERT INTO test_results (id, set_id, timestamp, results) VALUES (:id, :set_id, :timestamp, :results)'), + { + 'id': result_id, + 'set_id': set_id, + 'timestamp': results_data.get('timestamp', ''), + 'results': json.dumps(results_data) + } + ) + return result_id diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..5ee6477 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +testpaths = tests diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5db8850 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +streamlit>=1.28.0 +pandas>=1.5.0 +plotly>=5.0.0 +openai>=1.0.0 +sqlalchemy>=2.0.0 +pymysql>=1.0.0 +cryptography>=42.0.0 +# Note: uuid and configparser are built-in Python modules +# installa con pip install -r requirements.txt + diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..718566d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,17 @@ +import os, sys +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +import pytest +from sqlalchemy import create_engine +from models import db_utils, question, question_set + +@pytest.fixture(autouse=True) +def in_memory_db(monkeypatch): + engine = create_engine("sqlite:///:memory:") + # Patch get_engine in db_utils and imported references + monkeypatch.setattr(db_utils, "_engine", engine) + monkeypatch.setattr(db_utils, "get_engine", lambda: engine) + monkeypatch.setattr(question, "get_engine", lambda: engine) + monkeypatch.setattr(question_set, "get_engine", lambda: engine) + db_utils.init_db() + yield engine + engine.dispose() diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py new file mode 100644 index 0000000..eac42c5 --- /dev/null +++ b/tests/test_placeholder.py @@ -0,0 +1,4 @@ +import pytest + +def test_placeholder(): + assert True diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py new file mode 100644 index 0000000..18c3d78 --- /dev/null +++ b/tests/test_question_controller.py @@ -0,0 +1,18 @@ +from controllers import question_controller + + +def test_add_update_delete_question(): + qid = question_controller.add_question("Domanda?", "Risposta", "cat") + df = question_controller.load_questions() + assert qid in df["id"].values + + question_controller.update_question(qid, domanda="Nuova domanda", risposta_attesa="Nuova", categoria="newcat") + df2 = question_controller.load_questions() + row = df2[df2["id"] == qid].iloc[0] + assert row["domanda"] == "Nuova domanda" + assert row["risposta_attesa"] == "Nuova" + assert row["categoria"] == "newcat" + + question_controller.delete_question(qid) + df3 = question_controller.load_questions() + assert qid not in df3["id"].values diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py new file mode 100644 index 0000000..a7b5950 --- /dev/null +++ b/tests/test_question_set_controller.py @@ -0,0 +1,22 @@ +from controllers import question_controller, question_set_controller + + +def test_create_update_delete_set(): + qid1 = question_controller.add_question("Q1", "A1") + qid2 = question_controller.add_question("Q2", "A2") + + set_id = question_set_controller.create_set("Set1", [qid1, qid2]) + sets = question_set_controller.load_sets() + row = sets[sets["id"] == set_id].iloc[0] + assert row["name"] == "Set1" + assert set(row["questions"]) == {qid1, qid2} + + question_set_controller.update_set(set_id, name="Set2", question_ids=[qid2]) + sets2 = question_set_controller.load_sets() + row2 = sets2[sets2["id"] == set_id].iloc[0] + assert row2["name"] == "Set2" + assert row2["questions"] == [qid2] + + question_set_controller.delete_set(set_id) + sets3 = question_set_controller.load_sets() + assert set_id not in sets3["id"].values diff --git a/view/api_configurazione.py b/view/api_configurazione.py new file mode 100644 index 0000000..d1e88e0 --- /dev/null +++ b/view/api_configurazione.py @@ -0,0 +1,266 @@ +import streamlit as st +import uuid +import pandas as pd + +from controllers.openai_controller import ( + test_api_connection, DEFAULT_MODEL, DEFAULT_ENDPOINT +) +from view.style_utils import add_page_header, add_section_title +from view.component_utils import create_card +from controllers.api_preset_controller import load_presets, save_presets, delete_preset + +add_page_header( + "Gestione Preset API", + icon="⚙️", + description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." +) + +# Stato della sessione per la gestione del form di creazione/modifica preset +if "editing_preset" not in st.session_state: st.session_state.editing_preset = False +if "current_preset_edit_id" not in st.session_state: st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica +if "preset_form_data" not in st.session_state: st.session_state.preset_form_data = {} + +# Carica sempre i preset API dal database +st.session_state.api_presets = load_presets() + + +# Funzioni di callback per i pulsanti del form +def start_new_preset_edit(): + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = None # Indica nuovo preset + st.session_state.preset_form_data = { + "name": "", + "endpoint": DEFAULT_ENDPOINT, + "api_key": "", + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000 + } + +def start_existing_preset_edit(preset_id): + preset_to_edit = st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = preset_id + st.session_state.preset_form_data = preset_to_edit.copy() + # Assicura che i campi numerici siano del tipo corretto per gli slider/number_input + st.session_state.preset_form_data["temperature"] = float(st.session_state.preset_form_data.get("temperature", 0.0)) + st.session_state.preset_form_data["max_tokens"] = int(st.session_state.preset_form_data.get("max_tokens", 1000)) + if "endpoint" not in st.session_state.preset_form_data: + st.session_state.preset_form_data["endpoint"] = DEFAULT_ENDPOINT + +def cancel_preset_edit(): + st.session_state.editing_preset = False + st.session_state.current_preset_edit_id = None + st.session_state.preset_form_data = {} + +def save_preset_from_form(): + """Salva un preset leggendo i valori direttamente dagli input della form.""" + # Recupera sempre i valori correnti dei widget dal session_state + preset_name = st.session_state.get("preset_name", "").strip() + endpoint = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + api_key = st.session_state.get("preset_api_key", "") + model = st.session_state.get("preset_model", DEFAULT_MODEL) + temperature = float( + st.session_state.get( + "preset_temperature", + st.session_state.preset_form_data.get("temperature", 0.0), + ) + ) + max_tokens = int( + st.session_state.get( + "preset_max_tokens", + st.session_state.preset_form_data.get("max_tokens", 1000), + ) + ) + + # Aggiorna il dizionario del form in sessione con i valori raccolti + st.session_state.preset_form_data.update( + { + "name": preset_name, + "endpoint": endpoint, + "api_key": api_key, + "model": model, + "temperature": temperature, + "max_tokens": max_tokens, + } + ) + + form_data = st.session_state.preset_form_data.copy() + + if not preset_name: + st.error("Il nome del preset non può essere vuoto.") + return + + current_id = st.session_state.current_preset_edit_id + presets_df = st.session_state.api_presets + + # Controlla se il nome del preset esiste già (escludendo il preset corrente se in modifica) + existing_names = presets_df["name"].tolist() + if current_id: + current_preset_original_name = presets_df[presets_df["id"] == current_id].iloc[0]["name"] + if preset_name != current_preset_original_name and preset_name in existing_names: + st.error(f"Un altro preset con nome '{preset_name}' esiste già.") + return + elif preset_name in existing_names: + st.error(f"Un preset con nome '{preset_name}' esiste già.") + return + + # Prepara i dati del preset da salvare + preset_data_to_save = { + "name": preset_name, # Usa il valore validato + "endpoint": form_data.get("endpoint"), + "api_key": form_data.get("api_key"), + "model": form_data.get("model"), + "temperature": float(form_data.get("temperature", 0.0)), + "max_tokens": int(form_data.get("max_tokens", 1000)) + } + + if current_id: # Modifica preset esistente + idx = presets_df.index[presets_df["id"] == current_id].tolist()[0] + for key, value in preset_data_to_save.items(): + presets_df.loc[idx, key] = value + st.success(f"Preset '{preset_name}' aggiornato con successo!") + else: # Crea nuovo preset + new_id = str(uuid.uuid4()) + preset_data_to_save["id"] = new_id + new_preset_df = pd.DataFrame([preset_data_to_save]) + presets_df = pd.concat([presets_df, new_preset_df], ignore_index=True) + st.success(f"Preset '{preset_name}' creato con successo!") + + st.session_state.api_presets = presets_df + save_presets(presets_df) + cancel_preset_edit() # Chiudi il form + +def delete_preset_callback(preset_id): + presets_df = st.session_state.api_presets + preset_name_to_delete = presets_df[presets_df["id"] == preset_id].iloc[0]["name"] + st.session_state.api_presets = presets_df[presets_df["id"] != preset_id] + save_presets(st.session_state.api_presets) + delete_preset(preset_id) + st.success(f"Preset '{preset_name_to_delete}' eliminato.") + if st.session_state.current_preset_edit_id == preset_id: + cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form + +# Sezione per visualizzare/modificare i preset +if st.session_state.editing_preset: + add_section_title("Modifica/Crea Preset API", icon="✏️") + form_data = st.session_state.preset_form_data + + with st.form(key="preset_form"): + # Usa un key specifico per il campo nome e aggiorna il form_data + form_data["name"] = st.text_input( + "Nome del Preset", + value=form_data.get("name", ""), + key="preset_name", # Key esplicita per il campo nome + help="Un nome univoco per questo preset." + ) + + # Campo chiave API con key esplicita + form_data["api_key"] = st.text_input( + "Chiave API", + value=form_data.get("api_key", ""), + type="password", + key="preset_api_key", # Key esplicita per la chiave API + help="La tua chiave API per il provider selezionato." + ) + + # Campo endpoint con key esplicita + form_data["endpoint"] = st.text_input( + "Provider Endpoint", + value=form_data.get("endpoint", DEFAULT_ENDPOINT), + placeholder="https://api.openai.com/v1", + key="preset_endpoint", # Key esplicita per l'endpoint + help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" + ) + + # Modello sempre personalizzabile + form_data["model"] = st.text_input( + "Modello", + value=form_data.get("model", DEFAULT_MODEL), + placeholder="gpt-4o", + key="preset_model", # Key esplicita per il modello + help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" + ) + + form_data["temperature"] = st.slider( + "Temperatura", + 0.0, + 2.0, + float(form_data.get("temperature", 0.0)), + 0.1, + key="preset_temperature", + ) + form_data["max_tokens"] = st.number_input( + "Max Tokens", + min_value=50, + max_value=8000, + value=int(form_data.get("max_tokens", 1000)), + step=50, + key="preset_max_tokens", + ) + + # Campo Test Connessione e pulsanti di salvataggio/annullamento + # Pulsante Test Connessione + if st.form_submit_button("⚡ Testa Connessione API"): + # Usa direttamente i valori dal session_state per il test + api_key_to_test = st.session_state.get("preset_api_key", "") + endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) + + with st.spinner("Test in corso..."): + success, message = test_api_connection( + api_key=api_key_to_test, + endpoint=endpoint_to_test, + model=model_to_test, + temperature=form_data.get("temperature", 0.0), + max_tokens=form_data.get("max_tokens", 1000) + ) + if success: + st.success(message) + else: + st.error(message) + + # Pulsanti Salva e Annulla + cols_form_buttons = st.columns(2) + with cols_form_buttons[0]: + if st.form_submit_button("💾 Salva Preset", on_click=save_preset_from_form, type="primary", use_container_width=True): + pass # Il callback gestisce il salvataggio + with cols_form_buttons[1]: + if st.form_submit_button("❌ Annulla", on_click=cancel_preset_edit, use_container_width=True): + pass # Il callback gestisce il cambio di stato +else: + add_section_title("Preset API Salvati", icon="🗂️") + if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): + pass # Il callback gestisce il cambio di stato + + if st.session_state.api_presets.empty: + st.info("Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare.") + else: + for index, preset in st.session_state.api_presets.iterrows(): + with st.container(): + st.markdown(f"#### {preset['name']}") + cols_preset_details = st.columns([3, 1, 1]) + with cols_preset_details[0]: + st.caption(f"Modello: {preset.get('model', 'N/A')}") + st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") + with cols_preset_details[1]: + if st.button("✏️ Modifica", key=f"edit_{preset['id']}", on_click=start_existing_preset_edit, args=(preset['id'],), use_container_width=True): + pass + with cols_preset_details[2]: + if st.button("🗑️ Elimina", key=f"delete_{preset['id']}", on_click=delete_preset_callback, args=(preset['id'],), type="secondary", use_container_width=True): + pass + st.divider() + +# Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) +if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui + st.success(st.session_state.preset_applied_message) + del st.session_state.preset_applied_message + +if "preset_saved_message" in st.session_state: + st.success(st.session_state.preset_saved_message) + del st.session_state.preset_saved_message + +if "preset_deleted_message" in st.session_state: + st.success(st.session_state.preset_deleted_message) + del st.session_state.preset_deleted_message + diff --git a/view/component_utils.py b/view/component_utils.py new file mode 100644 index 0000000..0d602cf --- /dev/null +++ b/view/component_utils.py @@ -0,0 +1,177 @@ +import streamlit as st + + +def create_card(title: str, content: str, icon: str | None = None, + is_success: bool = False, is_warning: bool = False, is_error: bool = False): + """Crea una scheda stilizzata con un contenuto personalizzabile.""" + color = "#4F6AF0" + bg_color = "white" + shadow_color = "rgba(79, 106, 240, 0.15)" + + if is_success: + color = "#28a745" + bg_color = "#f8fff9" + shadow_color = "rgba(40, 167, 69, 0.15)" + elif is_warning: + color = "#ffc107" + bg_color = "#fffef8" + shadow_color = "rgba(255, 193, 7, 0.15)" + elif is_error: + color = "#dc3545" + bg_color = "#fff8f8" + shadow_color = "rgba(220, 53, 69, 0.15)" + + icon_text = f'{icon}' if icon else "" + + st.markdown( + f""" + + +
+
{icon_text}{title}
+
{content}
+
+ """, + unsafe_allow_html=True, + ) + + +def create_metrics_container(metrics_data: list[dict]): + """Crea un contenitore con metriche ben stilizzate.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + metrics_html = '
' + for metric in metrics_data: + icon_html = ( + f'
{metric.get("icon", "")}
' + if metric.get("icon") + else "" + ) + unit = metric.get("unit", "") + unit_html = f'{unit}' if unit else "" + help_text = f'title="{metric.get("help")}"' if metric.get("help") else "" + + metrics_html += f""" +
+ {icon_html} +
{metric['value']}{unit_html}
+
{metric['label']}
+
+ """ + + metrics_html += '
' + st.markdown(metrics_html, unsafe_allow_html=True) + diff --git a/view/esecuzione_test.py b/view/esecuzione_test.py new file mode 100644 index 0000000..29fd0de --- /dev/null +++ b/view/esecuzione_test.py @@ -0,0 +1,239 @@ +import streamlit as st +import pandas as pd +import time +from datetime import datetime + +from controllers.question_controller import load_questions +from controllers.question_set_controller import load_sets +from controllers.test_controller import add_result, load_results +from controllers.api_preset_controller import load_presets +from controllers.openai_controller import ( + evaluate_answer, generate_example_answer_with_llm +) +from view.style_utils import add_page_header, add_section_title +from view.component_utils import create_card + + +# === FUNZIONI DI CALLBACK === + +def set_llm_mode_callback(): + """Funzione di callback: imposta la modalità LLM""" + if st.session_state.test_mode != "Valutazione Automatica con LLM": + st.session_state.test_mode = "Valutazione Automatica con LLM" + st.session_state.mode_changed = True + + +def run_llm_test_callback(): + """Funzione di callback: esegue il test LLM""" + st.session_state.run_llm_test = True + + +# === Inizializzazione delle variabili di stato === +if 'test_mode' not in st.session_state: + st.session_state.test_mode = "Valutazione Automatica con LLM" +if 'mode_changed' not in st.session_state: + st.session_state.mode_changed = False +if 'run_llm_test' not in st.session_state: + st.session_state.run_llm_test = False + +# Gestisce il cambio di modalità +if st.session_state.mode_changed: + st.session_state.mode_changed = False + st.rerun() + +add_page_header( + "Esecuzione Test", + icon="🧪", + description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." +) + +# Carica sempre i dati necessari dal database +st.session_state.api_presets = load_presets() +st.session_state.question_sets = load_sets() +st.session_state.questions = load_questions() + +if st.session_state.api_presets.empty: + st.error( + "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' per crearne almeno uno prima di eseguire i test.") + st.stop() + +# Controlla se ci sono set di domande disponibili +if st.session_state.question_sets.empty: + st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") + st.stop() + + +# Ottieni testo della domanda e risposta attesa per ID +def get_question_data(question_id): + if 'questions' in st.session_state and not st.session_state.questions.empty: + question_row = st.session_state.questions[st.session_state.questions['id'] == str(question_id)] + if not question_row.empty: + # Assicurati che i nomi delle colonne ('domanda', 'risposta_attesa') coincidano con quelli + # forniti da question_controller.load_questions() + q = question_row.iloc[0].get('domanda', question_row.iloc[0].get('question', '')) + a = question_row.iloc[0].get('risposta_attesa', question_row.iloc[0].get('expected_answer', '')) + + # Verifica che domanda e risposta non siano vuote + if not q or not isinstance(q, str) or q.strip() == "": + st.error(f"La domanda con ID {question_id} è vuota o non valida.") + return None + + if not a or not isinstance(a, str) or a.strip() == "": + st.warning(f"La risposta attesa per la domanda con ID {question_id} è vuota o non valida.") + # Continuiamo comunque ma con una risposta vuota + a = "Risposta non disponibile" + + return {'question': q, 'expected_answer': a} + return None + + +# Seleziona set di domande per il test +add_section_title("Seleziona Set di Domande", icon="📚") +set_options = {} +if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + for _, row in st.session_state.question_sets.iterrows(): + if 'questions' in row and row['questions']: + set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" + +if not set_options: + st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") + st.stop() + +selected_set_id = st.selectbox( + "Seleziona un set di domande", + options=list(set_options.keys()), + format_func=lambda x: set_options[x], + key="select_question_set_for_test" +) + +selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] +questions_in_set = selected_set['questions'] + + +# --- Opzioni API basate su Preset --- +add_section_title("Opzioni API basate su Preset", icon="🛠️") + +preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} +preset_display_names = list(preset_names_to_id.keys()) + + +def get_preset_config_by_name(name): + preset_id = preset_names_to_id.get(name) + if preset_id: + return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() + return None + + +# Seleziona preset per generazione risposta (comune a entrambe le modalità) +generation_preset_name = st.selectbox( + "Seleziona Preset per Generazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="generation_preset_select", + help="Il preset API utilizzato per generare la risposta alla domanda." +) +st.session_state.selected_generation_preset_name = generation_preset_name + +# Seleziona preset per valutazione (solo per modalità LLM) +if st.session_state.test_mode == "Valutazione Automatica con LLM": + evaluation_preset_name = st.selectbox( + "Seleziona Preset per Valutazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="evaluation_preset_select", + help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." + ) + st.session_state.selected_evaluation_preset_name = evaluation_preset_name + +show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) + +# --- Logica di Esecuzione Test --- +test_mode_selected = st.session_state.test_mode + +if test_mode_selected == "Valutazione Automatica con LLM": + st.header("Esecuzione: Valutazione Automatica con LLM") + + # Pulsante che utilizza la funzione di callback + st.button( + "🚀 Esegui Test con LLM", + key="run_llm_test_btn", + on_click=run_llm_test_callback + ) + + # Gestisce l'esecuzione del test + if st.session_state.run_llm_test: + st.session_state.run_llm_test = False # Resetta lo stato + + gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) + eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) + + if not gen_preset_config or not eval_preset_config: + st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") + else: + with st.spinner("Generazione risposte e valutazione LLM in corso..."): + results = {} + for q_id in questions_in_set: + q_data = get_question_data(q_id) + if q_data: + # Genera risposta di esempio usando LLM + generation_output = generate_example_answer_with_llm(q_data['question'], + client_config=gen_preset_config, + show_api_details=show_api_details) + actual_answer = generation_output["answer"] + generation_api_details = generation_output["api_details"] + + if actual_answer is None: + # Gestione errore generazione + results[q_id] = { + 'question': q_data['question'], + 'expected_answer': q_data['expected_answer'], + 'actual_answer': "Errore Generazione", + 'evaluation': {'score': 0, 'explanation': 'Generazione fallita'}, + 'generation_api_details': generation_api_details + # Salva anche se la generazione fallisce + } + continue + + evaluation = evaluate_answer(q_data['question'], q_data['expected_answer'], actual_answer, + client_config=eval_preset_config, + show_api_details=show_api_details) + results[q_id] = { + 'question': q_data['question'], + 'expected_answer': q_data['expected_answer'], + 'actual_answer': actual_answer, + 'evaluation': evaluation, # Questo conterrà i dettagli API della VALUTAZIONE + 'generation_api_details': generation_api_details # Dettagli API della GENERAZIONE + } + + # Salva e visualizza risultati + if results: + avg_score = sum(r['evaluation']['score'] for r in results.values()) / len(results) if results else 0 + result_data = { + 'set_name': selected_set['name'], + 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'avg_score': avg_score, + 'sample_type': 'Generata da LLM', + 'method': 'LLM', + 'generation_preset': gen_preset_config['name'], + 'evaluation_preset': eval_preset_config['name'], + 'questions': results + } + result_id = add_result(selected_set_id, result_data) + st.session_state.results = load_results() + st.success(f"Test LLM completato! Punteggio medio: {avg_score:.2f}%") + + # Visualizzazione risultati dettagliati + st.subheader("Risultati Dettagliati") + for q_id, result in results.items(): + with st.expander( + f"Domanda: {result['question'][:50]}..." + ): + col1, col2 = st.columns(2) + with col1: + st.write("**Domanda:**", result['question']) + st.write("**Risposta Attesa:**", result['expected_answer']) + with col2: + st.write("**Risposta Generata:**", result['actual_answer']) + st.write("**Punteggio:**", f"{result['evaluation']['score']:.1f}%") + st.write("**Valutazione:**", result['evaluation']['explanation']) + diff --git a/view/gestione_domande.py b/view/gestione_domande.py new file mode 100644 index 0000000..847b386 --- /dev/null +++ b/view/gestione_domande.py @@ -0,0 +1,292 @@ +import streamlit as st +import pandas as pd +import json + +from controllers.question_controller import ( + load_questions, + add_question, + update_question, + delete_question, + import_questions_from_file, +) +from view.style_utils import add_page_header, add_section_title +from view.component_utils import create_card +from view.session_state import ensure_keys + + +# === FUNZIONI DI CALLBACK === + +def save_question_callback(question_id, edited_question, edited_answer, edited_category): + """Funzione di callback: salva le modifiche alla domanda""" + if update_question(question_id, domanda=edited_question, risposta_attesa=edited_answer, + categoria=edited_category): + st.session_state.save_success_message = "Domanda aggiornata con successo!" + st.session_state.save_success = True + # Aggiorna le domande in session_state per riflettere la modifica + st.session_state.questions.loc[st.session_state.questions['id'] == question_id, 'categoria'] = edited_category + st.session_state.trigger_rerun = True + else: + st.session_state.save_error_message = "Impossibile aggiornare la domanda." + st.session_state.save_error = True + + +def delete_question_callback(question_id): + """Funzione di callback: elimina la domanda""" + delete_question(question_id) + st.session_state.delete_success_message = "Domanda eliminata con successo!" + st.session_state.delete_success = True + st.session_state.trigger_rerun = True + + +def import_questions_callback(): + """Funzione di callback: importa le domande""" + if 'uploaded_file_content' in st.session_state and st.session_state.uploaded_file_content is not None: + success, message = import_questions_from_file(st.session_state.uploaded_file_content) + + if success: + st.session_state.import_success_message = message + st.session_state.import_success = True + # Ricarica le domande dal database per aggiornare lo stato + st.session_state.questions = load_questions() + st.session_state.trigger_rerun = True + else: + st.session_state.import_error_message = message + st.session_state.import_error = True + + +# === FUNZIONI DI DIALOGO === + +@st.dialog("Conferma Eliminazione") +def confirm_delete_question_dialog(question_id, question_text): + """Dialogo di conferma per l'eliminazione della domanda""" + st.write(f"Sei sicuro di voler eliminare questa domanda?") + st.write(f"**Domanda:** {question_text[:100]}...") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + delete_question_callback(question_id) + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +# === Inizializzazione delle variabili di stato === +ensure_keys({ + "save_success": False, + "save_error": False, + "delete_success": False, + "add_success": False, + "import_success": False, + "import_error": False, + "trigger_rerun": False, +}) + +# Carica sempre le domande dal database per la visualizzazione +st.session_state.questions = load_questions() + +# Gestisce la logica di rerun +if st.session_state.trigger_rerun: + st.session_state.trigger_rerun = False + st.rerun() + +# Mostra i messaggi di stato +if st.session_state.save_success: + st.success(st.session_state.get('save_success_message', 'Operazione completata con successo!')) + st.session_state.save_success = False + +if st.session_state.save_error: + st.error(st.session_state.get('save_error_message', 'Si è verificato un errore.')) + st.session_state.save_error = False + +if st.session_state.delete_success: + st.success(st.session_state.get('delete_success_message', 'Eliminazione completata con successo!')) + st.session_state.delete_success = False + +if st.session_state.add_success: + st.success(st.session_state.get('add_success_message', 'Domanda aggiunta con successo!')) + st.session_state.add_success = False + +if st.session_state.import_success: + st.success(st.session_state.get('import_success_message', 'Importazione completata con successo!')) + st.session_state.import_success = False + +if st.session_state.import_error: + st.error(st.session_state.get('import_error_message', 'Errore durante l\'importazione.')) + st.session_state.import_error = False + +# Aggiungi un'intestazione stilizzata +add_page_header( + "Gestione Domande", + icon="📋", + description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." +) + +# Scheda per diverse funzioni di gestione delle domande +tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) + +# Scheda Visualizza e Modifica Domande +with tabs[0]: + st.header("Visualizza e Modifica Domande") + + if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df = st.session_state.questions + # Assicurati che la colonna 'categoria' esista, altrimenti aggiungila con valori vuoti + if 'categoria' not in questions_df.columns: + questions_df['categoria'] = "" + else: + # Riempi i valori NaN o None nella colonna 'categoria' con una stringa vuota o 'N/A' + # per assicurare che il filtro funzioni correttamente e per la visualizzazione. + questions_df['categoria'] = questions_df['categoria'].fillna('N/A') + + # Ottieni le categorie uniche per il filtro, includendo un'opzione per mostrare tutto + # Converti esplicitamente in stringa per evitare problemi con tipi misti e aggiungi 'Tutte le categorie' + unique_categories = sorted(list(questions_df['categoria'].astype(str).unique())) + unique_categories.insert(0, "Tutte le categorie") + + # Crea il selettore per la categoria + selected_category = st.selectbox( + "Filtra per categoria:", + options=unique_categories, + index=0 # Imposta "Tutte le categorie" come predefinito + ) + + # Filtra il DataFrame in base alla categoria selezionata + if selected_category == "Tutte le categorie": + filtered_questions_df = questions_df + else: + filtered_questions_df = questions_df[questions_df['categoria'] == selected_category] + + if not filtered_questions_df.empty: + for idx, row in filtered_questions_df.iterrows(): + # Usa .get('categoria', 'N/A') per una gestione sicura se 'categoria' non fosse presente o fosse NaN dopo il filtro + # Anche se abbiamo gestito i NaN prima, è una buona pratica per la robustezza. + category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' + with st.expander( + f"Domanda: {row['domanda'][:100]}... (Categoria: {category_display})" + ): + col1, col2 = st.columns([3, 1]) + + with col1: + edited_question = st.text_area( + f"Modifica Domanda {idx + 1}", + value=row['domanda'], + key=f"q_edit_{row['id']}" + ) + + edited_answer = st.text_area( + f"Modifica Risposta Attesa {idx + 1}", + value=row['risposta_attesa'], + key=f"a_edit_{row['id']}" + ) + + edited_category_value = row.get('categoria', '') + edited_category = st.text_input( + f"Modifica Categoria {idx + 1}", + value=edited_category_value, + key=f"c_edit_{row['id']}" + ) + + with col2: + # Pulsante Aggiorna con callback + st.button( + "Salva Modifiche", + key=f"save_{row['id']}", + on_click=save_question_callback, + args=(row['id'], edited_question, edited_answer, edited_category) + ) + + # Pulsante Elimina con dialog di conferma + if st.button( + "Elimina Domanda", + key=f"delete_{row['id']}", + type="secondary" + ): + confirm_delete_question_dialog(row['id'], row['domanda']) + else: + st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") + + else: + st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") + +# Scheda Aggiungi Domande +with tabs[1]: + st.header("Aggiungi Nuova Domanda") + + with st.form("add_question_form"): + domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") + risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") + categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") + + submitted = st.form_submit_button("Aggiungi Domanda") + + if submitted: + if domanda and risposta_attesa: + # Passa la categoria, che può essere una stringa vuota se non inserita + question_id = add_question(domanda=domanda, risposta_attesa=risposta_attesa, + categoria=categoria) + st.session_state.add_success_message = f"Domanda aggiunta con successo con ID: {question_id}" + st.session_state.add_success = True + st.session_state.trigger_rerun = True + st.rerun() + else: + st.error("Sono necessarie sia la domanda che la risposta attesa.") + +# Scheda Importa da File +with tabs[2]: + st.header("Importa Domande da File") + + st.write(""" + Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). + + ### Formato File: + - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + + ### Esempio CSV: + ```csv + domanda,risposta_attesa,categoria + "Quanto fa 2+2?","4","Matematica Base" + "Qual è la capitale della Francia?","Parigi","Geografia" + "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" + ``` + + ### Esempio JSON: + ```json + [ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } + ] + ``` + """) + + uploaded_file = st.file_uploader("Scegli un file", type=["csv", "json"]) + + if uploaded_file is not None: + # Salva il file in session_state per l'uso da parte della callback + st.session_state.uploaded_file_content = uploaded_file + + # Pulsante che utilizza la funzione di callback + st.button( + "Importa Domande", + key="import_questions_btn", + on_click=import_questions_callback + ) diff --git a/view/gestione_set.py b/view/gestione_set.py new file mode 100644 index 0000000..772b37f --- /dev/null +++ b/view/gestione_set.py @@ -0,0 +1,379 @@ +import streamlit as st +from controllers.question_set_controller import ( + load_sets, + create_set, +) +from controllers.question_controller import load_questions +from view.style_utils import add_page_header, add_section_title +from view.component_utils import create_card, create_metrics_container +from view.session_state import ensure_keys +from view.set_helpers import ( + save_set_callback, + delete_set_callback, + confirm_delete_set_dialog, + import_set_callback, + get_question_text, + get_question_category, + mark_expander_open, + create_save_set_callback, + create_delete_set_callback, +) + + +ensure_keys({ + "save_set_success": False, + "save_set_error": False, + "delete_set_success": False, + "create_set_success": False, + "import_set_success": False, + "import_set_error": False, + "trigger_rerun": False, + "question_checkboxes": {}, + "newly_selected_questions": {}, + "set_expanders": {}, +}) + +if st.session_state.trigger_rerun: + st.session_state.trigger_rerun = False + st.rerun() + +if st.session_state.save_set_success: + st.success(st.session_state.get('save_set_success_message', 'Set aggiornato con successo!')) + st.session_state.save_set_success = False + +if st.session_state.save_set_error: + st.error(st.session_state.get('save_set_error_message', 'Errore durante l\'aggiornamento del set.')) + st.session_state.save_set_error = False + +if st.session_state.delete_set_success: + st.success(st.session_state.get('delete_set_success_message', 'Set eliminato con successo!')) + st.session_state.delete_set_success = False + +if st.session_state.create_set_success: + st.success(st.session_state.get('create_set_success_message', 'Set creato con successo!')) + st.session_state.create_set_success = False + +if st.session_state.import_set_success: + st.success(st.session_state.get('import_set_success_message', 'Importazione completata con successo!')) + st.session_state.import_set_success = False + +if st.session_state.import_set_error: + st.error(st.session_state.get('import_set_error_message', 'Errore durante l\'importazione.')) + st.session_state.import_set_error = False + +# Inizializza sempre i dati caricandoli dal database +st.session_state.questions = load_questions() +st.session_state.question_sets = load_sets() + +# Assicurati che esista lo stato degli expander per ogni set +if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + current_set_ids = st.session_state.question_sets['id'].tolist() + # Rimuovi stati per set non più presenti + for sid in list(st.session_state.set_expanders.keys()): + if sid not in current_set_ids: + del st.session_state.set_expanders[sid] + # Aggiungi stato predefinito per nuovi set + for sid in current_set_ids: + st.session_state.set_expanders.setdefault(sid, False) + +# Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN +if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df_temp = st.session_state.questions + if 'categoria' not in questions_df_temp.columns: + questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante + questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN + st.session_state.questions = questions_df_temp + +# Aggiungi un'intestazione stilizzata +add_page_header( + "Gestione Set di Domande", + icon="📚", + description="Organizza le tue domande in set per test e valutazioni" +) + +# Schede per diverse funzioni di gestione dei set +tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) + + +# Funzione per ottenere il testo della domanda tramite ID + +# Scheda Visualizza e Modifica Set +with tabs[0]: + st.header("Visualizza e Modifica Set di Domande") + + questions_ready = ('questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns) + sets_ready = 'question_sets' in st.session_state + + if not questions_ready: + st.warning( + "Dati delle domande (incluse categorie) non completamente caricati. Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'.") + # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti + unique_categories_for_filter = [] + selected_categories = [] + else: + questions_df = st.session_state.questions + # Ottieni categorie uniche per il filtro, escludendo 'N/A' se si preferisce non mostrarlo come opzione selezionabile + # o gestendolo specificamente. Per ora, includiamo tutto. + unique_categories_for_filter = sorted(list(questions_df['categoria'].astype(str).unique())) + if not unique_categories_for_filter: + st.info("Nessuna categoria definita nelle domande esistenti per poter filtrare.") + + selected_categories = st.multiselect( + "Filtra per categorie (mostra i set che contengono almeno una domanda da OGNI categoria selezionata):", + options=unique_categories_for_filter, + default=[], + key="filter_categories", + ) + + if sets_ready and not st.session_state.question_sets.empty: + question_sets_df = st.session_state.question_sets + display_sets_df = question_sets_df.copy() # Inizia con tutti i set + + if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti + filtered_set_indices = [] + for idx, set_row in question_sets_df.iterrows(): + question_ids_in_set = set_row.get('questions', []) + if not isinstance(question_ids_in_set, list): + question_ids_in_set = [] + + if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro + continue + + categories_present_in_set = set() + for q_id in question_ids_in_set: + category = get_question_category(str(q_id), questions_df) + categories_present_in_set.add(category) + + # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata + if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): + filtered_set_indices.append(idx) + + display_sets_df = question_sets_df.loc[filtered_set_indices] + + if display_sets_df.empty and selected_categories: + st.info( + f"Nessun set trovato che contenga domande da tutte le categorie selezionate: {', '.join(selected_categories)}.") + elif display_sets_df.empty and not selected_categories: + st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") + + for idx, row in display_sets_df.iterrows(): + exp_key = f"set_expander_{row['id']}" + if exp_key not in st.session_state.set_expanders: + st.session_state.set_expanders[exp_key] = False + + with st.expander( + f"Set: {row['name']}", + expanded=st.session_state.set_expanders.get(exp_key, False), + ): + col1, col2 = st.columns([3, 1]) + + with col1: + edited_name = st.text_input( + f"Nome Set", + value=row['name'], + key=f"set_name_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + + st.subheader("Domande in questo Set") + current_question_ids_in_set = row.get('questions', []) + if not isinstance(current_question_ids_in_set, list): + current_question_ids_in_set = [] + + if row['id'] not in st.session_state.question_checkboxes: + st.session_state.question_checkboxes[row['id']] = {} + + if current_question_ids_in_set: + for q_id in current_question_ids_in_set: + q_text = get_question_text(str(q_id)) + q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' + display_text = f"{q_text} (Categoria: {q_cat})" + + # 使用回调来更新checkbox状态 + checkbox_value = st.checkbox( + display_text, + value=True, + key=f"qcheck_{row['id']}_{q_id}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value + else: + st.info("Nessuna domanda in questo set.") + + st.subheader("Aggiungi Domande al Set") + + # 初始化新选择的问题状态 + if row['id'] not in st.session_state.newly_selected_questions: + st.session_state.newly_selected_questions[row['id']] = [] + + if questions_ready: + all_questions_df = st.session_state.questions + available_questions_df = all_questions_df[ + ~all_questions_df['id'].astype(str).isin( + [str(q_id) for q_id in current_question_ids_in_set]) + ] + + if not available_questions_df.empty: + question_dict_for_multiselect = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" for q_id, q_text in + zip(available_questions_df['id'].astype(str), available_questions_df['domanda']) + } + newly_selected_questions_ids = st.multiselect( + "Seleziona domande da aggiungere", + options=list(question_dict_for_multiselect.keys()), + format_func=lambda x: question_dict_for_multiselect.get(x, x), + key=f"add_q_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids + else: + st.info("Nessuna altra domanda disponibile da aggiungere.") + else: + st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") + + with col2: + st.button( + "Salva Modifiche", + key=f"save_set_{row['id']}", + on_click=create_save_set_callback(row['id'], exp_key) + ) + + # Pulsante Elimina con dialog di conferma + if st.button( + "Elimina Set", + key=f"delete_set_{row['id']}", + type="secondary" + ): + mark_expander_open(exp_key) + confirm_delete_set_dialog(row['id'], row['name']) + + # Lo stato dell'expander viene aggiornato tramite i callback + + elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): + st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") + +# Scheda Crea Nuovo Set +with tabs[1]: + st.header("Crea Nuovo Set di Domande") + + with st.form("create_set_form"): + set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") + + selected_qs_for_new_set = [] + questions_ready_for_creation = ('questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns) + + if questions_ready_for_creation: + all_questions_df_creation = st.session_state.questions + question_dict_for_creation = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" for q_id, q_text in + zip(all_questions_df_creation['id'].astype(str), all_questions_df_creation['domanda']) + } + + selected_qs_for_new_set = st.multiselect( + "Seleziona domande per questo set", + options=list(question_dict_for_creation.keys()), + format_func=lambda x: question_dict_for_creation.get(x, x), + key="create_set_questions", + ) + else: + st.info( + "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). Vai a 'Gestione Domande' per aggiungere/caricare domande.") + + submitted = st.form_submit_button("Crea Set") + + if submitted: + if set_name: + set_id = create_set(set_name, [str(q_id) for q_id in selected_qs_for_new_set]) + st.session_state.create_set_success_message = f"Set di domande creato con successo con ID: {set_id}" + st.session_state.create_set_success = True + st.session_state.trigger_rerun = True + st.rerun() + else: + st.error("Il nome del set è obbligatorio.") + +# Scheda Importa da File +with tabs[2]: + st.header("Importa Set da File") + + st.write(""" + Carica un file JSON o CSV contenente uno o più set di domande. + + ### Formato File JSON per Set Multipli: + ```json + [ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } + ] + ``` + + ### Formato CSV: + Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` + (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e + ``categoria``. + ```csv + name,id,domanda,risposta_attesa,categoria + Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia + Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia + Matematica Base,3,Quanto fa 2+2?,4,Matematica + Matematica Base,4,Quanto fa 10*4?,40,Matematica + ``` + + ### Note Importanti: + - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente + - Se un set con lo stesso nome esiste già, verrà saltato + - Solo le domande nuove verranno aggiunte al database + - Le domande esistenti verranno referenziate nei nuovi set + """) + + uploaded_file = st.file_uploader("Scegli un file", type=["json", "csv"]) + + if uploaded_file is not None: + st.session_state.uploaded_file_content_set = uploaded_file + st.button( + "Importa Set", + key="import_set_btn", + on_click=import_set_callback + ) + + diff --git a/view/session_state.py b/view/session_state.py new file mode 100644 index 0000000..0301621 --- /dev/null +++ b/view/session_state.py @@ -0,0 +1,12 @@ +import streamlit as st + + +def ensure_keys(defaults: dict) -> None: + """Garantisce la presenza delle chiavi in ``st.session_state``. + + Args: + defaults: Dizionario con chiavi e valori da impostare se mancanti. + """ + for key, value in defaults.items(): + st.session_state.setdefault(key, value) + diff --git a/view/set_helpers.py b/view/set_helpers.py new file mode 100644 index 0000000..3d6686e --- /dev/null +++ b/view/set_helpers.py @@ -0,0 +1,119 @@ +import streamlit as st + +from controllers.question_set_controller import update_set, delete_set, import_sets_from_file +from controllers.question_controller import load_questions + + +def save_set_callback(set_id: str, edited_name: str, question_options_checkboxes: dict, newly_selected_questions_ids: list[str]): + kept_questions_ids = [q_id for q_id, keep in question_options_checkboxes.items() if keep] + updated_questions_ids = list(set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids])) + + if update_set(set_id, edited_name, updated_questions_ids): + st.session_state.save_set_success_message = "Set di domande aggiornato con successo!" + st.session_state.save_set_success = True + st.session_state.trigger_rerun = True + else: + st.session_state.save_set_error_message = "Impossibile aggiornare il set di domande." + st.session_state.save_set_error = True + + +def delete_set_callback(set_id: str): + delete_set(set_id) + st.session_state.delete_set_success_message = "Set di domande eliminato con successo!" + st.session_state.delete_set_success = True + st.session_state.trigger_rerun = True + + +@st.dialog("Conferma Eliminazione") +def confirm_delete_set_dialog(set_id: str, set_name: str): + """Dialog di conferma per l'eliminazione del set di domande""" + st.write(f"Sei sicuro di voler eliminare il set '{set_name}'?") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + delete_set_callback(set_id) + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +def import_set_callback(): + """Importa uno o più set di domande da file JSON o CSV.""" + + st.session_state.import_set_success = False + st.session_state.import_set_error = False + st.session_state.import_set_success_message = "" + st.session_state.import_set_error_message = "" + + uploaded_file = st.session_state.get("uploaded_file_content_set") + result = import_sets_from_file(uploaded_file) + + if result["success"]: + st.session_state.import_set_success = True + st.session_state.import_set_success_message = result["success_message"] + if result.get("questions_df") is not None: + st.session_state.questions = result["questions_df"] + if result.get("sets_df") is not None: + st.session_state.question_sets = result["sets_df"] + st.session_state.uploaded_file_content_set = None + else: + st.session_state.import_set_error = True + st.session_state.import_set_error_message = result["error_message"] + + for warn in result.get("warnings", []): + st.warning(warn) + + st.session_state.trigger_rerun = True + + +def get_question_text(question_id: str) -> str: + """Ritorna il testo della domanda dato il suo ID.""" + if "questions" in st.session_state and not st.session_state.questions.empty: + if "domanda" not in st.session_state.questions.columns: + st.session_state.questions = load_questions() + if "domanda" not in st.session_state.questions.columns: + return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" + + question_row = st.session_state.questions[st.session_state.questions["id"] == str(question_id)] + if not question_row.empty: + return question_row.iloc[0]["domanda"] + return f"ID Domanda: {question_id} (non trovata o dati non caricati)" + + +def get_question_category(question_id: str, questions_df): + """Ritorna la categoria di una domanda dato il suo ID.""" + if questions_df is not None and not questions_df.empty and "categoria" in questions_df.columns: + question_row = questions_df[questions_df["id"] == str(question_id)] + if not question_row.empty: + return question_row.iloc[0]["categoria"] + return "N/A" + + +def mark_expander_open(exp_key: str): + """Segna l'expander come aperto nello stato di sessione""" + if "set_expanders" in st.session_state: + st.session_state.set_expanders[exp_key] = True + + +def create_save_set_callback(set_id: str, exp_key: str): + def callback(): + mark_expander_open(exp_key) + edited_name = st.session_state.get(f"set_name_{set_id}", "") + question_options_checkboxes = st.session_state.question_checkboxes.get(set_id, {}) + newly_selected_questions_ids = st.session_state.newly_selected_questions.get(set_id, []) + + save_set_callback(set_id, edited_name, question_options_checkboxes, newly_selected_questions_ids) + + return callback + + +def create_delete_set_callback(set_id: str): + def callback(): + delete_set_callback(set_id) + + return callback diff --git a/view/style_utils.py b/view/style_utils.py new file mode 100644 index 0000000..032f2bd --- /dev/null +++ b/view/style_utils.py @@ -0,0 +1,187 @@ +import streamlit as st + + +def add_global_styles(): + """Aggiunge stili globali all'applicazione.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + +def add_page_header(title: str, icon: str = "💡", description: str | None = None): + """Aggiunge un'intestazione di pagina stilizzata.""" + add_global_styles() + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + st.markdown( + f""" + +
+ """, + unsafe_allow_html=True, + ) + + +def add_section_title(title: str, icon: str | None = None): + """Aggiunge un titolo di sezione stilizzato.""" + icon_text = f"{icon} " if icon else "" + st.markdown( + f"
{icon_text}{title}
", + unsafe_allow_html=True, + ) + diff --git a/view/ui_utils.py b/view/ui_utils.py new file mode 100644 index 0000000..83fbd66 --- /dev/null +++ b/view/ui_utils.py @@ -0,0 +1,10 @@ +from .style_utils import add_global_styles, add_page_header, add_section_title +from .component_utils import create_card, create_metrics_container + +__all__ = [ + "add_global_styles", + "add_page_header", + "add_section_title", + "create_card", + "create_metrics_container", +] diff --git a/view/visualizza_risultati.py b/view/visualizza_risultati.py new file mode 100644 index 0000000..d1c18ff --- /dev/null +++ b/view/visualizza_risultati.py @@ -0,0 +1,487 @@ +import streamlit as st +import pandas as pd +import json +import plotly.express as px +import plotly.graph_objects as go + +from controllers.test_controller import ( + load_results, + import_results_from_file, +) +from controllers.question_set_controller import load_sets +from controllers.api_preset_controller import load_presets +from view.style_utils import add_page_header, add_section_title +from view.component_utils import create_card, create_metrics_container + +add_page_header( + "Visualizzazione Risultati Test", + icon="📊", + description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." +) + +# Carica sempre i risultati direttamente dal database +st.session_state.results = load_results() +if st.session_state.results.empty: + st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") + st.stop() + +# Carica sempre i set di domande dal database +st.session_state.question_sets = load_sets() + +# Carica sempre i preset API dal database +st.session_state.api_presets = load_presets() + +# Stato per messaggi di importazione risultati +if 'import_results_success' not in st.session_state: + st.session_state.import_results_success = False +if 'import_results_error' not in st.session_state: + st.session_state.import_results_error = False +if 'import_results_message' not in st.session_state: + st.session_state.import_results_message = "" + +if st.session_state.import_results_success: + st.success(st.session_state.import_results_message) + st.session_state.import_results_success = False +if st.session_state.import_results_error: + st.error(st.session_state.import_results_message) + st.session_state.import_results_error = False + +def get_set_name(set_id): + if not st.session_state.question_sets.empty: + set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] + if not set_info.empty: + return set_info.iloc[0]['name'] + return "Set Sconosciuto" + +def get_model_from_preset_name(preset_name): + """Restituisce il modello associato a un preset, se disponibile.""" + if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: + preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] + if not preset_row.empty: + return preset_row.iloc[0]['model'] + return "Sconosciuto" + +def import_results_callback(): + """Callback per importare risultati da file JSON.""" + if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: + success, message = import_results_from_file(st.session_state.uploaded_results_file) + st.session_state.import_results_message = message + st.session_state.import_results_success = success + st.session_state.import_results_error = not success + if success: + st.session_state.results = load_results() + st.session_state.uploaded_results_file = None + +# Filtri per Set e Modello LLM +all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) +all_model_names = sorted({get_model_from_preset_name(r['results'].get('generation_preset')) for _, r in st.session_state.results.iterrows()}) + +selected_set_filter = st.selectbox( + "Filtra per Set", + options=["Tutti"] + all_set_names, + index=0, + key="filter_set_name" +) + +selected_model_filter = st.selectbox( + "Filtra per Modello LLM", + options=["Tutti"] + all_model_names, + index=0, + key="filter_model_name" +) + +filtered_results_df = st.session_state.results +if selected_set_filter != "Tutti": + set_ids = st.session_state.question_sets[st.session_state.question_sets['name'] == selected_set_filter]['id'].astype(str) + filtered_results_df = filtered_results_df[filtered_results_df['set_id'].astype(str).isin(set_ids)] + +if selected_model_filter != "Tutti": + filtered_results_df = filtered_results_df[ + filtered_results_df.apply( + lambda row: get_model_from_preset_name(row['results'].get('generation_preset')) == selected_model_filter, + axis=1 + ) + ] + +# Elabora i risultati per la visualizzazione nel selectbox +processed_results_for_select = [] +for _, row in filtered_results_df.iterrows(): + result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli + set_name = get_set_name(row['set_id']) + avg_score = result_data.get('avg_score', 0) + method = result_data.get('method', 'N/A') + method_icon = "🤖" if method == "LLM" else "📊" + + processed_results_for_select.append({ + 'id': row['id'], + 'display_name': f"{row['timestamp']} - {method_icon} {set_name} (Avg: {avg_score:.2f}%) - {method}" + }) + +processed_results_for_select.sort(key=lambda x: x['display_name'].split(' - ')[0], reverse=True) # Ordina per timestamp + +result_options = {r['id']: r['display_name'] for r in processed_results_for_select} + +# Seleziona il risultato da visualizzare +selected_result_id = st.selectbox( + "Seleziona un Risultato del Test da Visualizzare", + options=list(result_options.keys()), + format_func=lambda x: result_options[x], + index=0 if result_options else None, + key="select_test_result_to_view" +) + +# Opzionalmente seleziona un secondo risultato per il confronto +# Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso +compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] +compare_result_id = st.selectbox( + "Confronta con un altro risultato (opzionale)", + options=[None] + compare_options, + format_func=lambda x: "Nessun confronto" if x is None else result_options[x], + index=0, + key="select_test_result_compare" +) +if not selected_result_id: + st.info("Nessun risultato selezionato o disponibile.") + st.stop() + +# Ottieni i dati del risultato selezionato +selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] +result_data = selected_result_row['results'] +set_name = get_set_name(selected_result_row['set_id']) +questions_results = result_data.get('questions', {}) + +with st.expander("Esporta/Importa Risultati"): + col_exp, col_imp = st.columns(2) + with col_exp: + selected_json = json.dumps({ + 'id': selected_result_row['id'], + 'set_id': selected_result_row['set_id'], + 'timestamp': selected_result_row['timestamp'], + 'results': result_data + }, indent=2) + st.download_button( + "Export Risultato Selezionato", + selected_json, + file_name=f"result_{selected_result_row['id']}.json", + mime="application/json" + ) + + all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) + st.download_button( + "Export Tutti i Risultati", + all_json, + file_name="all_results.json", + mime="application/json" + ) + + with col_imp: + uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") + if uploaded_file is not None: + st.session_state.uploaded_results_file = uploaded_file + st.button( + "Importa Risultati", + on_click=import_results_callback, + key="import_results_btn" + ) + +# Carica eventuale risultato di confronto +compare_result_row = None +compare_result_data = None +compare_questions_results = {} +compare_set_name = "" +if compare_result_id: + compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] + compare_result_data = compare_result_row['results'] + compare_questions_results = compare_result_data.get('questions', {}) + compare_set_name = get_set_name(compare_result_row['set_id']) + +# Visualizza informazioni generali sul risultato +evaluation_method = result_data.get('method', 'LLM') +method_icon = "🤖" if evaluation_method == "LLM" else "📊" +method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" + +add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") +st.markdown(f"**ID Risultato:** `{selected_result_id}`") +st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") +st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") + +if 'generation_preset' in result_data: + st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") +if evaluation_method == "LLM" and 'evaluation_preset' in result_data: + st.markdown(f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`") + + +# Metriche Generali del Test +add_section_title("Metriche Generali del Test", icon="📈") + +if questions_results: + avg_score_overall = result_data.get('avg_score', 0) + num_questions = len(questions_results) + + cols_metrics = st.columns(2) + with cols_metrics[0]: + st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") + with cols_metrics[1]: + st.metric("Numero di Domande Valutate", num_questions) + + if compare_result_row is not None: + compare_avg = compare_result_data.get('avg_score', 0) + diff_avg = compare_avg - avg_score_overall + st.markdown("### Confronto") + cols_cmp = st.columns(3) + cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") + cols_cmp[1].metric(f"Punteggio Confronto", f"{compare_avg:.2f}%") + cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") + + # Grafico a barre dei punteggi per domanda (mostra anche il risultato di confronto se presente) + scores_data = [] + all_q_ids = set(questions_results.keys()) + if compare_result_row is not None: + all_q_ids |= set(compare_questions_results.keys()) + + for i, q_id in enumerate(all_q_ids): + q1 = questions_results.get(q_id) + q2 = compare_questions_results.get(q_id) + label = "" + if q1: + label = q1.get('question', f'Domanda {i}') + elif q2: + label = q2.get('question', f'Domanda {i}') + label = label[:50] + "..." if len(label) > 50 else label + + if q1: + scores_data.append({'Domanda': label, 'Punteggio': q1.get('evaluation', {}).get('score', 0), 'Tipo': 'Selezionato'}) + if q2: + scores_data.append({'Domanda': label, 'Punteggio': q2.get('evaluation', {}).get('score', 0), 'Tipo': 'Confronto'}) + + if scores_data: + df_scores = pd.DataFrame(scores_data) + fig = px.bar(df_scores, x='Domanda', y='Punteggio', color='Tipo', barmode='group', + title="Punteggi per Domanda", height=max(400, len(all_q_ids) * 30)) + fig.update_layout(yaxis_range=[0, 100]) + st.plotly_chart(fig, use_container_width=True) + + # Grafico aggiuntivo solo per la modalità LLM + if evaluation_method == "LLM": + # Raccogliamo i dati di Somiglianza, Correttezza e Completezza per ogni domanda + radar_data = [] + metrics_sum = {'similarity': 0, 'correctness': 0, 'completeness': 0} + count = 0 + + for q_id, q_data in questions_results.items(): + evaluation = q_data.get('evaluation', {}) + question_text = q_data.get('question', f'Domanda {q_id}') + # Utilizziamo i primi 20 caratteri della domanda come etichetta + question_label = question_text[:20] + "..." if len(question_text) > 20 else question_text + + # Raccogliamo i dati per il grafico radar individuale + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + radar_data.append({ + 'Domanda': question_label, + 'Somiglianza': similarity, + 'Correttezza': correctness, + 'Completezza': completeness + }) + + # Sommiamo per calcolare le medie + metrics_sum['similarity'] += similarity + metrics_sum['correctness'] += correctness + metrics_sum['completeness'] += completeness + count += 1 + + # Calcoliamo le medie per il risultato selezionato + avg_metrics = { + 'similarity': metrics_sum['similarity'] / count if count > 0 else 0, + 'correctness': metrics_sum['correctness'] / count if count > 0 else 0, + 'completeness': metrics_sum['completeness'] / count if count > 0 else 0 + } + + # Se esiste un risultato di confronto calcoliamo anche le sue medie + avg_metrics_cmp = None + if compare_result_row is not None and compare_questions_results: + cmp_sum = {'similarity': 0, 'correctness': 0, 'completeness': 0} + cmp_count = 0 + for q_cmp in compare_questions_results.values(): + eval_cmp = q_cmp.get('evaluation', {}) + cmp_sum['similarity'] += eval_cmp.get('similarity', 0) + cmp_sum['correctness'] += eval_cmp.get('correctness', 0) + cmp_sum['completeness'] += eval_cmp.get('completeness', 0) + cmp_count += 1 + avg_metrics_cmp = { + 'similarity': cmp_sum['similarity'] / cmp_count if cmp_count > 0 else 0, + 'correctness': cmp_sum['correctness'] / cmp_count if cmp_count > 0 else 0, + 'completeness': cmp_sum['completeness'] / cmp_count if cmp_count > 0 else 0 + } + + # Creiamo un DataFrame con i dati + df_radar = pd.DataFrame(radar_data) + + # Prima mostriamo il radar chart per ogni domanda + categories = ['Somiglianza', 'Correttezza', 'Completezza'] + + # Creiamo il grafico radar + fig_radar = go.Figure() + + # Aggiungiamo una traccia per ogni domanda del risultato selezionato + for i, row in df_radar.iterrows(): + fig_radar.add_trace(go.Scatterpolar( + r=[row['Somiglianza'], row['Correttezza'], row['Completezza']], + theta=categories, + fill='toself', + name=row['Domanda'] + )) + + # Traccia media risultato selezionato + fig_radar.add_trace(go.Scatterpolar( + r=[avg_metrics['similarity'], avg_metrics['correctness'], avg_metrics['completeness']], + theta=categories, + fill='toself', + name='Media', + line=dict(color='red', width=3) + )) + + # Traccia media confronto, se disponibile + if avg_metrics_cmp is not None: + fig_radar.add_trace(go.Scatterpolar( + r=[avg_metrics_cmp['similarity'], avg_metrics_cmp['correctness'], avg_metrics_cmp['completeness']], + theta=categories, + fill='toself', + name='Media Confronto', + line=dict(color='green', width=3, dash='dash') + )) + + # Configuriamo il layout del grafico radar + fig_radar.update_layout( + title="Grafico Radar delle Metriche LLM per ogni domanda", + polar=dict( + radialaxis=dict( + visible=True, + range=[0, 100] + ) + ), + showlegend=True, + legend=dict( + orientation="h", + yanchor="bottom", + y=-0.2, + xanchor="center", + x=0.5 + ), + height=600 + ) + + # Mostriamo il grafico radar + st.plotly_chart(fig_radar, use_container_width=True) + + # Mostriamo anche i valori medi in un blocco di metriche per maggiore chiarezza + st.subheader("Valori medi delle metriche") + cols = st.columns(3) + cols[0].metric("Somiglianza", f"{avg_metrics['similarity']:.2f}%") + cols[1].metric("Correttezza", f"{avg_metrics['correctness']:.2f}%") + cols[2].metric("Completezza", f"{avg_metrics['completeness']:.2f}%") + + if avg_metrics_cmp is not None: + cols_cmp = st.columns(3) + cols_cmp[0].metric("Somiglianza (Confronto)", f"{avg_metrics_cmp['similarity']:.2f}%") + cols_cmp[1].metric("Correttezza (Confronto)", f"{avg_metrics_cmp['correctness']:.2f}%") + cols_cmp[2].metric("Completezza (Confronto)", f"{avg_metrics_cmp['completeness']:.2f}%") +else: + st.info("Nessun dettaglio per le domande disponibile in questo risultato.") + +if compare_result_row is not None: + add_section_title("Confronto Dettagliato per Domanda", icon="🔍") + comparison_rows = [] + all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) + for qid in all_q_ids: + q1 = questions_results.get(qid, {}) + q2 = compare_questions_results.get(qid, {}) + label = q1.get('question') or q2.get('question') or str(qid) + score1 = q1.get('evaluation', {}).get('score', None) + score2 = q2.get('evaluation', {}).get('score', None) + delta = None + if score1 is not None and score2 is not None: + delta = score2 - score1 + comparison_rows.append({ + 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), + 'Selezionato': score1, + 'Confronto': score2, + 'Delta': delta + }) + if comparison_rows: + df_comp = pd.DataFrame(comparison_rows) + st.dataframe(df_comp) + +# Dettagli per ogni domanda +add_section_title("Risultati Dettagliati per Domanda", icon="📝") +if not questions_results: + st.info("Nessuna domanda trovata in questo set di risultati.") +else: + for q_id, q_data in questions_results.items(): + question_text = q_data.get('question', "Testo domanda non disponibile") + expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") + actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") + + with st.expander( + f"Domanda: {question_text[:100]}..." + ): + st.markdown(f"**Domanda:** {question_text}") + st.markdown(f"**Risposta Attesa:** {expected_answer}") + st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") + st.divider() + + # Mostra Dettagli API di Generazione (se presenti e richiesti) + generation_api_details = q_data.get('generation_api_details') + if generation_api_details and isinstance(generation_api_details, dict): + with st.container(): + st.markdown("###### Dettagli Chiamata API di Generazione Risposta") + if generation_api_details.get('request'): + st.caption("Richiesta API Generazione:") + st.json(generation_api_details['request'], expanded=False) + if generation_api_details.get('response_content'): + st.caption("Contenuto Risposta API Generazione:") + # Prova a formattare se è una stringa JSON, altrimenti mostra com'è + try: + response_data_gen = json.loads(generation_api_details['response_content']) if isinstance(generation_api_details['response_content'], str) else generation_api_details['response_content'] + st.code(json.dumps(response_data_gen, indent=2), language="json") + except: + st.text(generation_api_details['response_content']) + if generation_api_details.get('error'): + st.caption("Errore API Generazione:") + st.error(generation_api_details['error']) + st.divider() + + if evaluation_method == "LLM": + evaluation = q_data.get('evaluation', {}) # Assicurati che evaluation sia sempre un dizionario + st.markdown(f"##### Valutazione LLM") + score = evaluation.get('score', 0) + explanation = evaluation.get('explanation', "Nessuna spiegazione.") + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") + st.markdown(f"**Spiegazione:** {explanation}") + + cols_eval_metrics = st.columns(3) + cols_eval_metrics[0].metric("Somiglianza", f"{similarity:.2f}%") + cols_eval_metrics[1].metric("Correttezza", f"{correctness:.2f}%") + cols_eval_metrics[2].metric("Completezza", f"{completeness:.2f}%") + + api_details = evaluation.get('api_details') + if api_details and isinstance(api_details, dict): + with st.container(): # Sostituisce l'expander interno + st.markdown("###### Dettagli Chiamata API di Valutazione") + if api_details.get('request'): + st.caption("Richiesta API:") + st.json(api_details['request'], expanded=False) + if api_details.get('response_content'): + st.caption("Contenuto Risposta API:") + st.code(json.dumps(json.loads(api_details['response_content']), indent=2) if isinstance(api_details['response_content'], str) else json.dumps(api_details['response_content'], indent=2), language="json") + if api_details.get('error'): + st.caption("Errore API:") + st.error(api_details['error']) + + st.markdown("--- --- ---") From 42d7930c516322348102da5808db21bd49135bef Mon Sep 17 00:00:00 2001 From: oniichan Date: Sat, 2 Aug 2025 22:00:46 +0200 Subject: [PATCH 02/41] changed project structure, add automatic test and much more .. --- .flake8 | 3 + .github/workflows/ci.yml | 8 +- app.py | 330 +--------- controllers/api_preset_controller.py | 101 ++- controllers/db_controller.py | 6 + controllers/openai_controller.py | 157 ++++- controllers/question_controller.py | 62 +- controllers/question_set_controller.py | 193 +----- controllers/startup_controller.py | 28 + controllers/test_controller.py | 156 ++++- db.config.example | 2 +- initialize_db.py | 1 - logging_config.py | 3 +- models/api_preset.py | 70 +- models/cached_data.py | 20 + models/db_utils.py | 69 +- models/orm_models.py | 58 ++ models/question.py | 96 +-- models/question_set.py | 67 +- models/test_result.py | 74 ++- pyproject.toml | 8 + requirements-dev.txt | 2 + requirements.txt | 5 +- services/__init__.py | 0 services/cache.py | 47 ++ services/evaluation_service.py | 147 +++++ services/openai_service.py | 93 +++ services/question_service.py | 50 ++ services/question_set_importer.py | 256 ++++++++ tests/test_evaluation_service.py | 88 +++ tests/test_openai_controller.py | 112 ++++ tests/test_question_controller.py | 44 +- tests/test_question_service.py | 40 ++ tests/test_question_set_importer.py | 90 +++ tests/test_statistics.py | 43 ++ view/__init__.py | 1 + view/api_configurazione.py | 402 ++++++------ view/component_utils.py | 1 - view/esecuzione_test.py | 325 ++++------ view/gestione_domande.py | 499 +++++++------- view/gestione_set.py | 729 +++++++++++---------- view/home.py | 96 +++ view/session_state.py | 20 +- view/set_helpers.py | 93 +-- view/state_models.py | 47 ++ view/style_utils.py | 189 +++++- view/visualizza_risultati.py | 866 ++++++++++++------------- 47 files changed, 3623 insertions(+), 2174 deletions(-) create mode 100644 .flake8 create mode 100644 controllers/db_controller.py create mode 100644 controllers/startup_controller.py create mode 100644 models/cached_data.py create mode 100644 models/orm_models.py create mode 100644 pyproject.toml create mode 100644 requirements-dev.txt create mode 100644 services/__init__.py create mode 100644 services/cache.py create mode 100644 services/evaluation_service.py create mode 100644 services/openai_service.py create mode 100644 services/question_service.py create mode 100644 services/question_set_importer.py create mode 100644 tests/test_evaluation_service.py create mode 100644 tests/test_openai_controller.py create mode 100644 tests/test_question_service.py create mode 100644 tests/test_question_set_importer.py create mode 100644 tests/test_statistics.py create mode 100644 view/__init__.py create mode 100644 view/home.py create mode 100644 view/state_models.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..514af5b --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 120 +extend-ignore = E203,W503 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5c588f..d474a57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,8 +31,12 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install pytest + pip install -r requirements-dev.txt - name: Configure database for tests run: sed -i 's/host=db/host=127.0.0.1/' db.config.example + - name: Run linters + run: | + flake8 app.py controllers/ models/ view/ + mypy app.py controllers/ models/ view/ - name: Run tests - run: pytest -v + run: pytest --cov=controllers --cov=models diff --git a/app.py b/app.py index 91f5e3b..6145674 100644 --- a/app.py +++ b/app.py @@ -1,18 +1,22 @@ -import streamlit as st -import os -import importlib -import sys import logging + +import streamlit as st + +from view import ( + api_configurazione, + esecuzione_test, + gestione_domande, + gestione_set, + home, + visualizza_risultati, +) +from view.session_state import initialize_session_state +from view.style_utils import add_global_styles from logging_config import setup_logging setup_logging() logging.info("Applicazione avviata") -from models.db_utils import init_db -from controllers.question_controller import load_questions -from controllers.question_set_controller import load_sets -from controllers.test_controller import load_results - # Imposta la configurazione della pagina st.set_page_config( page_title="LLM Test Evaluation Platform", @@ -20,314 +24,24 @@ layout="wide", initial_sidebar_state="expanded" ) - -# Inizializza lo stato della sessione -if 'initialized' not in st.session_state: - st.session_state.initialized = False - -# Inizializza i file di dati se non esistono -if not st.session_state.initialized: - init_db() - st.session_state.initialized = True - -# Carica i dati nello stato della sessione se non sono già caricati -if 'questions' not in st.session_state: - st.session_state.questions = load_questions() - -if 'question_sets' not in st.session_state: - st.session_state.question_sets = load_sets() - -if 'results' not in st.session_state: - st.session_state.results = load_results() - -# Configurazione API -if 'api_key' not in st.session_state: - st.session_state.api_key = os.environ.get('OPENAI_API_KEY', '') - -if 'endpoint' not in st.session_state: - st.session_state.endpoint = 'https://api.openai.com/v1' - -if 'model' not in st.session_state: - st.session_state.model = 'gpt-4o' - -if 'temperature' not in st.session_state: - st.session_state.temperature = 0.0 - -if 'max_tokens' not in st.session_state: - st.session_state.max_tokens = 1000 +initialize_session_state() # Applicazione principale st.title("🧠 LLM Test Evaluation Platform - Artificial QI") -# Importa utilità UI -from view.style_utils import add_global_styles, add_page_header - # Aggiungi CSS personalizzato e stili globali add_global_styles() -# Definisce le pagine disponibili e il menu laterale PAGES = { - "Home": None, - "Configurazione API": "view.api_configurazione", - "Gestione Domande": "view.gestione_domande", - "Gestione Set di Domande": "view.gestione_set", - "Esecuzione Test": "view.esecuzione_test", - "Visualizzazione Risultati": "view.visualizza_risultati", + "Home": home.render, + "Configurazione API": api_configurazione.render, + "Gestione Domande": gestione_domande.render, + "Gestione Set di Domande": gestione_set.render, + "Esecuzione Test": esecuzione_test.render, + "Visualizzazione Risultati": visualizza_risultati.render, } selected_page = st.sidebar.radio("Navigazione", list(PAGES.keys())) - -# CSS Estremo per Visibilità Input in Tema Scuro -st.markdown(""" - -""", unsafe_allow_html=True) - - -def show_home_page(): - """Visualizza la pagina principale con le funzionalità della piattaforma.""" - - st.markdown( - """ -
-

🧠 Piattaforma di Valutazione LLM

-

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

-
-""", - unsafe_allow_html=True, - ) - - # Box delle funzionalità con icone e stile migliorato - col1, col2 = st.columns(2) - - with col1: - st.markdown( - """ -
-

- 📋 - Gestione delle Domande -

-

- Crea, modifica e organizza le tue domande di test con le risposte previste. - Costruisci set di test completi per valutare le risposte LLM in modo efficiente. -

-
- -
-

- 🔌 - Supporto Multi-Provider API -

-

- Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. - Configura parametri API e verifica le connessioni con feedback in tempo reale. -

-
- """, - unsafe_allow_html=True, - ) - - with col2: - st.markdown( - """ -
-

- 🧪 - Valutazione Automatizzata -

-

- Esegui test con punteggio automatico rispetto alle risposte previste. - Valuta la somiglianza semantica tra testi con modelli linguistici. -

-
- -
-

- 📊 - Analisi Avanzata -

-

- Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. - Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. -

-
- """, - unsafe_allow_html=True, - ) - - st.markdown( - """ -
-

🚀 Iniziare

-
    -
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. -
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. -
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. -
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. -
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. -
-

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

-
-""", - unsafe_allow_html=True, - ) - - -if selected_page == "Home": - show_home_page() -else: - module_name = PAGES[selected_page] - if module_name in sys.modules: - importlib.reload(sys.modules[module_name]) - else: - importlib.import_module(module_name) +render_page = PAGES[selected_page] +render_page() diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py index a153660..c77a741 100644 --- a/controllers/api_preset_controller.py +++ b/controllers/api_preset_controller.py @@ -1,14 +1,109 @@ +"""Business logic per la gestione dei preset API.""" + +import uuid +from typing import List, Optional, Tuple + import pandas as pd + from models.api_preset import APIPreset +from services.cache import ( + get_api_presets as _get_api_presets, + refresh_api_presets as _refresh_api_presets, +) +from controllers.startup_controller import ( + get_default_api_settings as _startup_get_default_api_settings, +) +DEFAULT_API_SETTINGS = _startup_get_default_api_settings() +DEFAULT_MODEL = DEFAULT_API_SETTINGS["model"] +DEFAULT_ENDPOINT = DEFAULT_API_SETTINGS["endpoint"] + + +def get_default_api_settings() -> dict: + """Restituisce l'endpoint e il modello API predefiniti.""" + return DEFAULT_API_SETTINGS.copy() def load_presets() -> pd.DataFrame: - return APIPreset.load_all() + """Restituisce i preset API utilizzando la cache.""" + return _get_api_presets() + + +def refresh_api_presets() -> pd.DataFrame: + """Svuota e ricarica la cache dei preset API.""" + return _refresh_api_presets() + + +def list_presets(df: pd.DataFrame | None = None) -> List[dict]: + """Restituisce l'elenco dei preset come lista di dizionari.""" + if df is None: + df = load_presets() + return df.to_dict(orient="records") -def save_presets(df: pd.DataFrame) -> None: +def get_preset_by_id(preset_id: str, df: pd.DataFrame | None = None) -> Optional[dict]: + """Recupera un singolo preset dato il suo ID.""" + if df is None: + df = load_presets() + match = df[df["id"] == preset_id] + if match.empty: + return None + return match.iloc[0].to_dict() + + +def validate_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, str]: + """Valida i dati di un preset prima del salvataggio.""" + name = data.get("name", "").strip() + if not name: + return False, "Il nome del preset non può essere vuoto." + + df = load_presets() + if preset_id: + df = df[df["id"] != preset_id] + if name in df["name"].values: + return False, f"Un preset con nome '{name}' esiste già." + return True, "" + + +def save_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, str, pd.DataFrame]: + """Salva un nuovo preset o aggiorna uno esistente.""" + is_valid, message = validate_preset(data, preset_id) + if not is_valid: + return False, message, load_presets() + + df = load_presets() + preset_data = { + "name": data.get("name"), + "endpoint": data.get("endpoint"), + "api_key": data.get("api_key"), + "model": data.get("model"), + "temperature": float(data.get("temperature", 0.0)), + "max_tokens": int(data.get("max_tokens", 1000)), + } + + if preset_id: + idx = df.index[df["id"] == preset_id] + if not idx.empty: + for key, value in preset_data.items(): + df.loc[idx[0], key] = value + success_message = f"Preset '{preset_data['name']}' aggiornato con successo!" + else: + preset_data["id"] = str(uuid.uuid4()) + df = pd.concat([df, pd.DataFrame([preset_data])], ignore_index=True) + success_message = f"Preset '{preset_data['name']}' creato con successo!" + APIPreset.save_df(df) + updated_df = refresh_api_presets() + return True, success_message, updated_df + +def delete_preset(preset_id: str) -> Tuple[bool, str, pd.DataFrame]: + """Elimina un preset e ritorna lo stato aggiornato.""" + df = load_presets() + match = df[df["id"] == preset_id] + if match.empty: + return False, "Preset non trovato.", df -def delete_preset(preset_id: str) -> None: + preset_name = match.iloc[0]["name"] APIPreset.delete(preset_id) + updated_df = refresh_api_presets() + return True, f"Preset '{preset_name}' eliminato.", updated_df diff --git a/controllers/db_controller.py b/controllers/db_controller.py new file mode 100644 index 0000000..a644818 --- /dev/null +++ b/controllers/db_controller.py @@ -0,0 +1,6 @@ +from models.db_utils import init_db + + +def initialize_database(): + """Inizializza il database creando le tabelle necessarie.""" + init_db() diff --git a/controllers/openai_controller.py b/controllers/openai_controller.py index 42318c7..578474d 100644 --- a/controllers/openai_controller.py +++ b/controllers/openai_controller.py @@ -1,24 +1,147 @@ -from models.openai_service import ( - evaluate_answer as _evaluate_answer, - generate_example_answer_with_llm as _generate_example_answer_with_llm, - test_api_connection as _test_api_connection, - DEFAULT_MODEL, - DEFAULT_ENDPOINT, -) +import logging +from openai import APIConnectionError, RateLimitError, APIStatusError +from services import evaluation_service, openai_service -def evaluate_answer(question: str, expected_answer: str, actual_answer: str, - client_config: dict, show_api_details: bool = False): - return _evaluate_answer(question, expected_answer, actual_answer, - client_config, show_api_details) +__all__ = [ + "evaluate_answer", + "generate_example_answer_with_llm", + "test_api_connection", +] -def generate_example_answer_with_llm(question: str, client_config: dict, - show_api_details: bool = False): - return _generate_example_answer_with_llm(question, client_config, show_api_details) +def evaluate_answer( + question: str, + expected_answer: str, + actual_answer: str, + client_config: dict, + show_api_details: bool = False, +): + """Delega la valutazione della risposta a services.evaluation_service.""" + return evaluation_service.evaluate_answer( + question, expected_answer, actual_answer, client_config, show_api_details + ) -def test_api_connection(api_key: str, endpoint: str, model: str, - temperature: float, max_tokens: int): - return _test_api_connection(api_key, endpoint, model, temperature, max_tokens) +def generate_example_answer_with_llm( + question: str, client_config: dict, show_api_details: bool = False +): + """Genera una risposta di esempio per una domanda utilizzando un LLM.""" + client = openai_service.get_openai_client( + api_key=client_config.get("api_key"), + base_url=client_config.get("endpoint"), + ) + if not client: + logging.error("Client API per la generazione risposte non configurato.") + return { + "answer": None, + "api_details": {"error": "Client API non configurato"} + if show_api_details + else None, + } + if question is None or not isinstance(question, str) or question.strip() == "": + logging.error("La domanda fornita \u00e8 vuota o non valida.") + return { + "answer": None, + "api_details": {"error": "Domanda vuota o non valida"} + if show_api_details + else None, + } + + prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" + + api_request_details = { + "model": client_config.get("model", openai_service.DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.7), + "max_tokens": client_config.get("max_tokens", 500), + } + + api_details_for_log = {} + if show_api_details: + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + answer = ( + response.choices[0].message.content.strip() + if response.choices and response.choices[0].message.content + else None + ) + if show_api_details: + api_details_for_log["response_content"] = ( + response.choices[0].message.content + if response.choices + else "Nessun contenuto" + ) + return { + "answer": answer, + "api_details": api_details_for_log if show_api_details else None, + } + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error( + f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + ) + if show_api_details: + api_details_for_log["error"] = str(e) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + except Exception as exc: + logging.error( + f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" + ) + if show_api_details: + api_details_for_log["error"] = str(exc) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + + +def test_api_connection( + api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int +): + """Testa la connessione all'API LLM con i parametri forniti.""" + client = openai_service.get_openai_client(api_key=api_key, base_url=endpoint) + if not client: + return False, "Client API non inizializzato. Controlla chiave API e endpoint." + + try: + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "user", + "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'", + } + ], + temperature=temperature, + max_tokens=max_tokens, + ) + content = response.choices[0].message.content or "" + if "Connessione riuscita." in content: + return True, "Connessione API riuscita!" + else: + return ( + False, + "Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): " + f"{content[:200]}...", + ) + except APIConnectionError as e: + return False, f"Errore di connessione API: {e}" + except RateLimitError as e: + return False, f"Errore di Rate Limit API: {e}" + except APIStatusError as e: + return ( + False, + "Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', " + f"autenticazione fallita, quota superata): {e.status_code} - {e.message}", + ) + except Exception as exc: + return False, ( + f"Errore imprevisto durante il test della connessione: {type(exc).__name__} - {exc}" + ) diff --git a/controllers/question_controller.py b/controllers/question_controller.py index 0f70466..3ffc676 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,33 +1,57 @@ -from typing import Optional, Tuple -import pandas as pd import os import json import uuid +from typing import Optional, Tuple, List + +import pandas as pd + from models.question import Question +from services.question_service import load_questions, refresh_questions + + +def filter_questions_by_category(category: Optional[str] = None) -> Tuple[pd.DataFrame, List[str]]: + """Ritorna le domande filtrate per categoria e la lista delle categorie disponibili.""" + df = load_questions() + if df.empty: + return df, [] -def load_questions() -> pd.DataFrame: - return Question.load_all() + if "categoria" not in df.columns: + df["categoria"] = "" + else: + df["categoria"] = df["categoria"].fillna("N/A") + + categories = sorted(list(df["categoria"].astype(str).unique())) + + if category: + filtered_df = df[df["categoria"] == category] + else: + filtered_df = df + + return filtered_df, categories def add_question(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: - return Question.add(domanda, risposta_attesa, categoria, question_id) + qid = Question.add(domanda, risposta_attesa, categoria, question_id) + refresh_questions() + return qid -def update_question(question_id: str, domanda: Optional[str] = None, risposta_attesa: Optional[str] = None, categoria: Optional[str] = None) -> None: - Question.update(question_id, domanda, risposta_attesa, categoria) +def update_question( + question_id: str, + domanda: Optional[str] = None, + risposta_attesa: Optional[str] = None, + categoria: Optional[str] = None, +) -> bool: + """Aggiorna una domanda e restituisce l'esito dell'operazione.""" + updated = Question.update(question_id, domanda, risposta_attesa, categoria) + refresh_questions() + return updated def delete_question(question_id: str) -> None: Question.delete(question_id) - - -def add_question_if_not_exists(question_id: str, domanda: str, risposta_attesa: str, categoria: str = "") -> bool: - df = Question.load_all() - if str(question_id) in df['id'].astype(str).values: - return False - Question.add(domanda, risposta_attesa, categoria, question_id) - return True + refresh_questions() def import_questions_from_file(file) -> Tuple[bool, str]: @@ -59,7 +83,11 @@ def import_questions_from_file(file) -> Tuple[bool, str]: required_columns = ['domanda', 'risposta_attesa'] if not all(col in imported_df.columns for col in required_columns): - return False, f"Il file importato deve contenere le colonne '{required_columns[0]}' e '{required_columns[1]}'." + return ( + False, + f"Il file importato deve contenere le colonne '{required_columns[0]}' " + f"e '{required_columns[1]}'.", + ) if 'id' not in imported_df.columns: imported_df['id'] = [str(uuid.uuid4()) for _ in range(len(imported_df))] @@ -80,7 +108,7 @@ def import_questions_from_file(file) -> Tuple[bool, str]: for _, row in final_imported_df.iterrows(): Question.add(row['domanda'], row['risposta_attesa'], row['categoria'], question_id=row['id']) added_count += 1 - + refresh_questions() return True, f"Importate con successo {added_count} domande." except Exception as e: return False, f"Errore durante l'importazione delle domande: {str(e)}" diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 413e6ab..0d364d2 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -3,26 +3,38 @@ import json import os from models.question_set import QuestionSet -from controllers.question_controller import ( - add_question_if_not_exists, - load_questions, +from services.question_service import load_questions +from services.cache import ( + get_question_sets as _get_question_sets, + refresh_question_sets as _refresh_question_sets, ) +from services.question_set_importer import parse_input, persist_sets def load_sets() -> pd.DataFrame: - return QuestionSet.load_all() + """Restituisce tutti i set di domande utilizzando la cache.""" + return _get_question_sets() + + +def refresh_question_sets() -> pd.DataFrame: + """Svuota e ricarica la cache dei set di domande.""" + return _refresh_question_sets() def create_set(name: str, question_ids: Optional[List[str]] = None) -> str: - return QuestionSet.create(name, question_ids) + set_id = QuestionSet.create(name, question_ids) + _refresh_question_sets() + return set_id def update_set(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: QuestionSet.update(set_id, name, question_ids) + _refresh_question_sets() def delete_set(set_id: str) -> None: QuestionSet.delete(set_id) + _refresh_question_sets() def import_sets_from_file(uploaded_file) -> Dict[str, Any]: @@ -41,164 +53,21 @@ def import_sets_from_file(uploaded_file) -> Dict[str, Any]: return result try: - file_extension = os.path.splitext(uploaded_file.name)[1].lower() - - if file_extension == ".csv": - df = pd.read_csv(uploaded_file) - required_cols = ["name", "id", "domanda", "risposta_attesa", "categoria"] - missing = [c for c in required_cols if c not in df.columns] - if missing: - raise ValueError( - "Il file CSV deve contenere le colonne " + ", ".join(required_cols) - ) - - sets_dict: Dict[str, List[Dict[str, str]]] = {} - for _, row in df.iterrows(): - name = str(row["name"]).strip() - if not name: - continue - question = { - "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", - "domanda": str(row["domanda"]).strip() if not pd.isna(row["domanda"]) else "", - "risposta_attesa": str(row["risposta_attesa"]).strip() if not pd.isna(row["risposta_attesa"]) else "", - "categoria": str(row["categoria"]).strip() if not pd.isna(row["categoria"]) else "", - } - sets_dict.setdefault(name, []).append(question) - - data = [{"name": n, "questions": qs} for n, qs in sets_dict.items()] - else: - string_data = uploaded_file.getvalue().decode("utf-8") - data = json.loads(string_data) - + data = parse_input(uploaded_file) current_questions = load_questions() current_sets = load_sets() - - if not isinstance(data, list): - result["error_message"] = ( - "Formato JSON non valido. Il file deve contenere una lista (array) di set." - ) - return result - - sets_imported_count = 0 - new_questions_added_count = 0 - existing_questions_found_count = 0 - - for set_idx, set_data in enumerate(data): - if not isinstance(set_data, dict): - result["warnings"].append( - f"Elemento #{set_idx+1} nella lista non è un set valido (saltato)." - ) - continue - - set_name = set_data.get("name") - questions_in_set_data = set_data.get("questions", []) - - if not set_name or not isinstance(set_name, str) or not set_name.strip(): - result["warnings"].append( - f"Set #{set_idx+1} con nome mancante o non valido (saltato)." - ) - continue - - if not isinstance(questions_in_set_data, list): - result["warnings"].append( - f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." - ) - continue - - if set_name in current_sets["name"].values: - result["warnings"].append( - f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." - ) - continue - - current_set_question_ids: List[str] = [] - - for q_idx, q_data in enumerate(questions_in_set_data): - if isinstance(q_data, dict): - q_id = str(q_data.get("id", "")) - q_text = q_data.get("domanda", "") - q_answer = q_data.get("risposta_attesa", "") - q_category = q_data.get("categoria", "") - else: - q_id = str(q_data) - q_text = "" - q_answer = "" - q_category = "" - - if not q_id: - result["warnings"].append( - f"Domanda #{q_idx+1} nel set '{set_name}' senza ID (saltata)." - ) - continue - - if q_text and q_answer: - if q_id in current_questions["id"].astype(str).values: - existing_questions_found_count += 1 - current_set_question_ids.append(q_id) - else: - was_added = add_question_if_not_exists( - question_id=q_id, - domanda=q_text, - risposta_attesa=q_answer, - categoria=q_category, - ) - if was_added: - new_questions_added_count += 1 - current_set_question_ids.append(q_id) - new_row = pd.DataFrame( - { - "id": [q_id], - "domanda": [q_text], - "risposta_attesa": [q_answer], - "categoria": [q_category], - } - ) - current_questions = pd.concat( - [current_questions, new_row], ignore_index=True - ) - else: - existing_questions_found_count += 1 - current_set_question_ids.append(q_id) - continue - else: - if q_id in current_questions["id"].astype(str).values: - existing_questions_found_count += 1 - current_set_question_ids.append(q_id) - else: - result["warnings"].append( - f"Domanda #{q_idx+1} con ID {q_id} nel set '{set_name}' non trovata e senza dettagli; saltata." - ) - - if current_set_question_ids or len(questions_in_set_data) == 0: - try: - create_set(set_name, current_set_question_ids) - sets_imported_count += 1 - except Exception as e: - result["warnings"].append( - f"Errore durante la creazione del set '{set_name}': {e}" - ) - else: - result["warnings"].append( - f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." - ) - - result["questions_df"] = load_questions() - result["sets_df"] = load_sets() - - if sets_imported_count > 0 or new_questions_added_count > 0: - success_parts = [] - if sets_imported_count > 0: - success_parts.append(f"{sets_imported_count} set importati") - if new_questions_added_count > 0: - success_parts.append(f"{new_questions_added_count} nuove domande aggiunte") - if existing_questions_found_count > 0: - success_parts.append( - f"{existing_questions_found_count} domande esistenti referenziate" - ) - - result["success"] = True - result["success_message"] = ". ".join(success_parts) + "." - else: + persist_result = persist_sets(data, current_questions, current_sets) + + result.update( + { + "success": persist_result["success"], + "success_message": persist_result["success_message"], + "questions_df": persist_result["questions_df"], + "sets_df": persist_result["sets_df"], + "warnings": persist_result["warnings"], + } + ) + if not persist_result["success"]: result["error_message"] = ( "Nessun set o domanda valida trovata nel file per l'importazione." ) @@ -206,6 +75,8 @@ def import_sets_from_file(uploaded_file) -> Dict[str, Any]: result["error_message"] = ( "Errore di decodifica JSON. Assicurati che il file sia un JSON valido." ) + except ValueError as e: + result["error_message"] = str(e) except Exception as e: result["error_message"] = f"Errore imprevisto durante l'importazione: {str(e)}" diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py new file mode 100644 index 0000000..f45f166 --- /dev/null +++ b/controllers/startup_controller.py @@ -0,0 +1,28 @@ +import os + +from controllers.db_controller import initialize_database +from services.question_service import load_questions +from controllers.question_set_controller import load_sets +from controllers.test_controller import load_results +from services.openai_service import DEFAULT_MODEL, DEFAULT_ENDPOINT + + +def get_default_api_settings() -> dict: + """Restituisce l'endpoint e il modello API predefiniti.""" + return {"model": DEFAULT_MODEL, "endpoint": DEFAULT_ENDPOINT} + + +def get_initial_state() -> dict: + """Inizializza il database e restituisce lo stato di default dell'applicazione.""" + initialize_database() + defaults = get_default_api_settings() + return { + "questions": load_questions(), + "question_sets": load_sets(), + "results": load_results(), + "api_key": os.environ.get("OPENAI_API_KEY", ""), + "endpoint": defaults["endpoint"], + "model": defaults["model"], + "temperature": 0.0, + "max_tokens": 1000, + } diff --git a/controllers/test_controller.py b/controllers/test_controller.py index 05e91c6..86935af 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -1,21 +1,39 @@ import pandas as pd -from typing import Dict, Tuple +from typing import Dict, Tuple, List import json import uuid from datetime import datetime from models.test_result import TestResult +from services.cache import ( + get_results as _get_results, + refresh_results as _refresh_results, +) +from controllers.openai_controller import ( + evaluate_answer, + generate_example_answer_with_llm, +) +from services.question_service import load_questions def load_results() -> pd.DataFrame: - return TestResult.load_all() + """Restituisce i risultati dei test utilizzando la cache.""" + return _get_results() + + +def refresh_results() -> pd.DataFrame: + """Svuota e ricarica la cache dei risultati dei test.""" + return _refresh_results() def add_result(set_id: str, results_data: Dict) -> str: - return TestResult.add(set_id, results_data) + rid = TestResult.add(set_id, results_data) + _refresh_results() + return rid def save_results(df: pd.DataFrame) -> None: TestResult.save_df(df) + _refresh_results() def import_results_from_file(file) -> Tuple[bool, str]: @@ -61,3 +79,135 @@ def import_results_from_file(file) -> Tuple[bool, str]: return True, message except Exception as e: return False, f"Errore durante l'importazione dei risultati: {str(e)}" + + +def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: + """Calcola statistiche dai risultati grezzi delle domande.""" + if not questions_results: + return { + "avg_score": 0, + "per_question_scores": [], + "radar_metrics": { + "similarity": 0, + "correctness": 0, + "completeness": 0, + }, + } + + per_question_scores: List[Dict] = [] + radar_sums = {"similarity": 0, "correctness": 0, "completeness": 0} + + for qid, qdata in questions_results.items(): + evaluation = qdata.get("evaluation", {}) + score = evaluation.get("score", 0) + per_question_scores.append({ + "question": qdata.get("question", f"Domanda {qid}"), + "score": score, + }) + for metric in radar_sums.keys(): + radar_sums[metric] += evaluation.get(metric, 0) + + count = len(per_question_scores) + avg_score = ( + sum(item["score"] for item in per_question_scores) / count if count > 0 else 0 + ) + radar_metrics = { + metric: radar_sums[metric] / count if count > 0 else 0 for metric in radar_sums + } + + return { + "avg_score": avg_score, + "per_question_scores": per_question_scores, + "radar_metrics": radar_metrics, + } + + +def execute_llm_test( + set_id: str, + set_name: str, + question_ids: List[str], + gen_preset_config: Dict, + eval_preset_config: Dict, + show_api_details: bool = False, +) -> Dict: + """ + Esegue la generazione delle risposte e la valutazione tramite LLM per + un elenco di domande. Restituisce i dettagli dei risultati e aggiorna + la cache dei risultati salvati. + """ + questions_df = load_questions() + + def get_question_data(qid: str): + row = questions_df[questions_df['id'] == str(qid)] + if row.empty: + return None + question = row.iloc[0].get('domanda', row.iloc[0].get('question', '')) + expected = row.iloc[0].get('risposta_attesa', row.iloc[0].get('expected_answer', '')) + if not question or not isinstance(question, str) or question.strip() == "": + return None + if not expected or not isinstance(expected, str) or expected.strip() == "": + expected = "Risposta non disponibile" + return {'question': question, 'expected_answer': expected} + + results: Dict = {} + for q_id in question_ids: + q_data = get_question_data(q_id) + if not q_data: + continue + generation_output = generate_example_answer_with_llm( + q_data['question'], + client_config=gen_preset_config, + show_api_details=show_api_details, + ) + actual_answer = generation_output.get('answer') + generation_api_details = generation_output.get('api_details') + + if actual_answer is None: + results[q_id] = { + 'question': q_data['question'], + 'expected_answer': q_data['expected_answer'], + 'actual_answer': 'Errore Generazione', + 'evaluation': {'score': 0, 'explanation': 'Generazione fallita'}, + 'generation_api_details': generation_api_details, + } + continue + + evaluation = evaluate_answer( + q_data['question'], + q_data['expected_answer'], + actual_answer, + client_config=eval_preset_config, + show_api_details=show_api_details, + ) + results[q_id] = { + 'question': q_data['question'], + 'expected_answer': q_data['expected_answer'], + 'actual_answer': actual_answer, + 'evaluation': evaluation, + 'generation_api_details': generation_api_details, + } + + if not results: + return {} + + avg_score = sum(r['evaluation']['score'] for r in results.values()) / len(results) + result_data = { + 'set_name': set_name, + 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'avg_score': avg_score, + 'sample_type': 'Generata da LLM', + 'method': 'LLM', + 'generation_llm': gen_preset_config.get('model'), + 'evaluation_llm': eval_preset_config.get('model'), + 'questions': results, + } + + result_id = add_result(set_id, result_data) + results_df = load_results() + + return { + 'result_id': result_id, + 'avg_score': avg_score, + 'results': results, + 'results_df': results_df, + } diff --git a/db.config.example b/db.config.example index 9d4ab66..a35d8de 100644 --- a/db.config.example +++ b/db.config.example @@ -6,7 +6,7 @@ database=llm_platform port=3306 ssl_ca= -# For Docker environment, use: +# Per l'ambiente Docker, utilizzare: # host=db # user=root # password= diff --git a/initialize_db.py b/initialize_db.py index ea0622a..f689b4d 100644 --- a/initialize_db.py +++ b/initialize_db.py @@ -19,4 +19,3 @@ except Exception as e: logging.error(f"Errore durante l'inizializzazione del database: {e}") logging.exception("Traceback dettagliato:") - diff --git a/logging_config.py b/logging_config.py index c03423d..6915a26 100644 --- a/logging_config.py +++ b/logging_config.py @@ -2,9 +2,8 @@ def setup_logging(level: int = logging.INFO) -> None: - """Configure root logger with a basic format.""" + """Configura il logger root con un formato di base.""" logging.basicConfig( level=level, format="%(asctime)s - %(levelname)s - %(message)s", ) - diff --git a/models/api_preset.py b/models/api_preset.py index e1e1992..164fd1b 100644 --- a/models/api_preset.py +++ b/models/api_preset.py @@ -1,9 +1,10 @@ from dataclasses import dataclass -from typing import Optional import pandas as pd -from sqlalchemy import text +from sqlalchemy import select + +from models.db_utils import get_session +from models.orm_models import APIPresetORM -from models.db_utils import get_engine @dataclass class APIPreset: @@ -18,30 +19,61 @@ class APIPreset: @staticmethod def load_all() -> pd.DataFrame: - df = pd.read_sql("SELECT * FROM api_presets", get_engine()) - df['id'] = df['id'].astype(str) - return df + with get_session() as session: + presets = session.execute(select(APIPresetORM)).scalars().all() + data = [ + { + "id": p.id, + "name": p.name, + "provider_name": p.provider_name, + "endpoint": p.endpoint, + "api_key": p.api_key, + "model": p.model, + "temperature": p.temperature, + "max_tokens": p.max_tokens, + } + for p in presets + ] + columns = [ + "id", + "name", + "provider_name", + "endpoint", + "api_key", + "model", + "temperature", + "max_tokens", + ] + return pd.DataFrame(data, columns=columns) @staticmethod def save_df(df: pd.DataFrame) -> None: - engine = get_engine() - with engine.begin() as conn: - existing_ids = pd.read_sql('SELECT id FROM api_presets', conn)['id'].astype(str).tolist() + with get_session() as session: + existing_ids = session.execute(select(APIPresetORM.id)).scalars().all() incoming_ids = df['id'].astype(str).tolist() for del_id in set(existing_ids) - set(incoming_ids): - conn.execute(text('DELETE FROM api_presets WHERE id=:id'), {'id': del_id}) + obj = session.get(APIPresetORM, del_id) + if obj: + session.delete(obj) for _, row in df.iterrows(): - # Convert NaN values from Pandas to None so that SQLAlchemy can - # correctly insert NULLs into the database instead of the string - # "nan" which would raise a ProgrammingError with MySQL. params = {k: (None if pd.isna(v) else v) for k, v in row.to_dict().items()} - if row['id'] in existing_ids: - conn.execute(text('''UPDATE api_presets SET name=:name, provider_name=:provider_name, endpoint=:endpoint, api_key=:api_key, model=:model, temperature=:temperature, max_tokens=:max_tokens WHERE id=:id'''), params) + obj = session.get(APIPresetORM, params['id']) + if obj: + obj.name = params['name'] + obj.provider_name = params['provider_name'] + obj.endpoint = params['endpoint'] + obj.api_key = params['api_key'] + obj.model = params['model'] + obj.temperature = params['temperature'] + obj.max_tokens = params['max_tokens'] else: - conn.execute(text('''INSERT INTO api_presets (id, name, provider_name, endpoint, api_key, model, temperature, max_tokens) VALUES (:id, :name, :provider_name, :endpoint, :api_key, :model, :temperature, :max_tokens)'''), params) + session.add(APIPresetORM(**params)) + session.commit() @staticmethod def delete(preset_id: str) -> None: - engine = get_engine() - with engine.begin() as conn: - conn.execute(text('DELETE FROM api_presets WHERE id=:id'), {'id': preset_id}) + with get_session() as session: + obj = session.get(APIPresetORM, preset_id) + if obj: + session.delete(obj) + session.commit() diff --git a/models/cached_data.py b/models/cached_data.py new file mode 100644 index 0000000..accd4c4 --- /dev/null +++ b/models/cached_data.py @@ -0,0 +1,20 @@ +from models.api_preset import APIPreset +from models.question import Question +from models.question_set import QuestionSet +from models.test_result import TestResult + + +def get_questions(): + return Question.load_all() + + +def get_question_sets(): + return QuestionSet.load_all() + + +def get_api_presets(): + return APIPreset.load_all() + + +def get_results(): + return TestResult.load_all() diff --git a/models/db_utils.py b/models/db_utils.py index bcefa53..9eb424c 100644 --- a/models/db_utils.py +++ b/models/db_utils.py @@ -1,6 +1,7 @@ import configparser from pathlib import Path from sqlalchemy import create_engine, text +from sqlalchemy.orm import declarative_base, sessionmaker def _ensure_database(cfg): @@ -12,7 +13,10 @@ def _ensure_database(cfg): with engine.begin() as conn: conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + +Base = declarative_base() _engine = None +_SessionFactory = None def get_engine(): @@ -38,59 +42,18 @@ def get_engine(): return _engine +def get_session(): + """Restituisce una nuova sessione ORM.""" + global _SessionFactory + engine = get_engine() + if _SessionFactory is None: + _SessionFactory = sessionmaker(bind=engine) + return _SessionFactory() + + def init_db(): """Crea le tabelle necessarie se non esistono.""" engine = get_engine() - with engine.begin() as conn: - conn.execute( - text( - """CREATE TABLE IF NOT EXISTS questions ( - id VARCHAR(36) PRIMARY KEY, - domanda TEXT, - risposta_attesa TEXT, - categoria TEXT - )""" - ) - ) - conn.execute( - text( - """CREATE TABLE IF NOT EXISTS question_sets ( - id VARCHAR(36) PRIMARY KEY, - name TEXT - )""" - ) - ) - conn.execute( - text( - """CREATE TABLE IF NOT EXISTS question_set_questions ( - set_id VARCHAR(36), - question_id VARCHAR(36), - PRIMARY KEY (set_id, question_id) - )""" - ) - ) - conn.execute( - text( - """CREATE TABLE IF NOT EXISTS test_results ( - id VARCHAR(36) PRIMARY KEY, - set_id VARCHAR(36), - timestamp TEXT, - results JSON - )""" - ) - ) - conn.execute( - text( - """CREATE TABLE IF NOT EXISTS api_presets ( - id VARCHAR(36) PRIMARY KEY, - name TEXT, - provider_name TEXT, - endpoint TEXT, - api_key TEXT, - model TEXT, - temperature FLOAT, - max_tokens INT - )""" - ) - ) - + # Assicura che tutti i modelli ORM siano registrati + import models.orm_models # noqa: F401 + Base.metadata.create_all(engine) diff --git a/models/orm_models.py b/models/orm_models.py new file mode 100644 index 0000000..b3414e1 --- /dev/null +++ b/models/orm_models.py @@ -0,0 +1,58 @@ +"""Modelli ORM SQLAlchemy per i dati dell'applicazione.""" + +# mypy: ignore-errors + +from sqlalchemy import Column, String, Text, Float, Integer, ForeignKey, Table, JSON +from sqlalchemy.orm import relationship + +from .db_utils import Base + +# Tabella di associazione per la relazione molti-a-molti tra set e domande +question_set_questions = Table( + "question_set_questions", + Base.metadata, + Column("set_id", String(36), ForeignKey("question_sets.id"), primary_key=True), + Column("question_id", String(36), ForeignKey("questions.id"), primary_key=True), +) + + +class QuestionORM(Base): + __tablename__ = "questions" + id = Column(String(36), primary_key=True) + domanda = Column(Text, nullable=False) + risposta_attesa = Column(Text, nullable=False) + categoria = Column(Text, default="") + + sets = relationship( + "QuestionSetORM", secondary=question_set_questions, back_populates="questions" + ) + + +class QuestionSetORM(Base): + __tablename__ = "question_sets" + id = Column(String(36), primary_key=True) + name = Column(Text, nullable=False) + + questions = relationship( + "QuestionORM", secondary=question_set_questions, back_populates="sets" + ) + + +class TestResultORM(Base): + __tablename__ = "test_results" + id = Column(String(36), primary_key=True) + set_id = Column(String(36)) + timestamp = Column(Text) + results = Column(JSON) + + +class APIPresetORM(Base): + __tablename__ = "api_presets" + id = Column(String(36), primary_key=True) + name = Column(Text) + provider_name = Column(Text) + endpoint = Column(Text) + api_key = Column(Text) + model = Column(Text) + temperature = Column(Float) + max_tokens = Column(Integer) diff --git a/models/question.py b/models/question.py index 6a06f8f..047dfba 100644 --- a/models/question.py +++ b/models/question.py @@ -2,9 +2,11 @@ from typing import Optional import uuid import pandas as pd -from sqlalchemy import text +from sqlalchemy import select, delete + +from models.db_utils import get_session +from models.orm_models import QuestionORM, question_set_questions -from models.db_utils import get_engine @dataclass class Question: @@ -15,51 +17,67 @@ class Question: @staticmethod def load_all() -> pd.DataFrame: - engine = get_engine() - df = pd.read_sql("SELECT * FROM questions", engine) - if 'categoria' not in df.columns: - df['categoria'] = "" - df['id'] = df['id'].astype(str) - df['domanda'] = df['domanda'].astype(str).fillna("") - df['risposta_attesa'] = df['risposta_attesa'].astype(str).fillna("") - df['categoria'] = df['categoria'].astype(str).fillna("") - return df + with get_session() as session: + results = session.execute(select(QuestionORM)).scalars().all() + data = [ + { + "id": q.id, + "domanda": q.domanda or "", + "risposta_attesa": q.risposta_attesa or "", + "categoria": q.categoria or "", + } + for q in results + ] + columns = ["id", "domanda", "risposta_attesa", "categoria"] + return pd.DataFrame(data, columns=columns) @staticmethod def add(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: qid = question_id or str(uuid.uuid4()) - engine = get_engine() - with engine.begin() as conn: - conn.execute( - text( - "INSERT INTO questions (id, domanda, risposta_attesa, categoria) VALUES (:id, :domanda, :risposta_attesa, :categoria)" - ), - {"id": qid, "domanda": domanda, "risposta_attesa": risposta_attesa, "categoria": categoria}, + with get_session() as session: + session.add( + QuestionORM( + id=qid, + domanda=domanda, + risposta_attesa=risposta_attesa, + categoria=categoria, + ) ) + session.commit() return qid @staticmethod - def update(question_id: str, domanda: Optional[str] = None, risposta_attesa: Optional[str] = None, categoria: Optional[str] = None) -> None: - updates = [] - params = {"id": question_id} - if domanda is not None: - updates.append("domanda=:domanda") - params["domanda"] = domanda - if risposta_attesa is not None: - updates.append("risposta_attesa=:risposta_attesa") - params["risposta_attesa"] = risposta_attesa - if categoria is not None: - updates.append("categoria=:categoria") - params["categoria"] = categoria - if not updates: - return - engine = get_engine() - with engine.begin() as conn: - conn.execute(text(f"UPDATE questions SET {', '.join(updates)} WHERE id=:id"), params) + def update( + question_id: str, + domanda: Optional[str] = None, + risposta_attesa: Optional[str] = None, + categoria: Optional[str] = None, + ) -> bool: + """Aggiorna una domanda esistente. + + Restituisce ``True`` se l'aggiornamento è andato a buon fine, + ``False`` se la domanda non esiste. + """ + with get_session() as session: + q = session.get(QuestionORM, question_id) + if not q: + return False + if domanda is not None: + q.domanda = domanda + if risposta_attesa is not None: + q.risposta_attesa = risposta_attesa + if categoria is not None: + q.categoria = categoria + session.commit() + return True @staticmethod def delete(question_id: str) -> None: - engine = get_engine() - with engine.begin() as conn: - conn.execute(text("DELETE FROM question_set_questions WHERE question_id=:id"), {"id": question_id}) - conn.execute(text("DELETE FROM questions WHERE id=:id"), {"id": question_id}) + with get_session() as session: + session.execute( + delete(question_set_questions).where(question_set_questions.c.question_id == question_id) + ) + q = session.get(QuestionORM, question_id) + if q: + session.delete(q) + session.commit() diff --git a/models/question_set.py b/models/question_set.py index 0b61f67..dfbdc9d 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -2,9 +2,11 @@ from typing import List, Optional import uuid import pandas as pd -from sqlalchemy import text +from sqlalchemy import select + +from models.db_utils import get_session +from models.orm_models import QuestionSetORM, QuestionORM -from models.db_utils import get_engine @dataclass class QuestionSet: @@ -14,43 +16,54 @@ class QuestionSet: @staticmethod def load_all() -> pd.DataFrame: - engine = get_engine() - sets_df = pd.read_sql("SELECT id, name FROM question_sets", engine) - rel_df = pd.read_sql("SELECT set_id, question_id FROM question_set_questions", engine) - sets_df['questions'] = sets_df['id'].apply(lambda sid: rel_df[rel_df['set_id']==sid]['question_id'].tolist()) - sets_df['id'] = sets_df['id'].astype(str) - sets_df['name'] = sets_df['name'].astype(str).fillna("") - return sets_df + with get_session() as session: + sets = session.execute(select(QuestionSetORM)).scalars().all() + data = [] + for s in sets: + data.append({ + "id": s.id, + "name": s.name or "", + "questions": [q.id for q in s.questions], + }) + columns = ["id", "name", "questions"] + return pd.DataFrame(data, columns=columns) @staticmethod def create(name: str, question_ids: Optional[List[str]] = None) -> str: set_id = str(uuid.uuid4()) q_ids = [str(q) for q in (question_ids or [])] - engine = get_engine() - with engine.begin() as conn: - conn.execute(text("INSERT INTO question_sets (id, name) VALUES (:id, :name)"), {"id": set_id, "name": name}) + with get_session() as session: + qs = [] for qid in q_ids: - conn.execute(text("INSERT INTO question_set_questions (set_id, question_id) VALUES (:sid, :qid)"), {"sid": set_id, "qid": qid}) + q_obj = session.get(QuestionORM, qid) + if q_obj: + qs.append(q_obj) + qset = QuestionSetORM(id=set_id, name=name, questions=qs) + session.add(qset) + session.commit() return set_id @staticmethod def update(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: - engine = get_engine() - with engine.begin() as conn: + with get_session() as session: + qset = session.get(QuestionSetORM, set_id) + if not qset: + return if name is not None: - conn.execute(text("UPDATE question_sets SET name=:name WHERE id=:id"), {"id": set_id, "name": name}) + qset.name = name if question_ids is not None: - existing = conn.execute(text("SELECT question_id FROM question_set_questions WHERE set_id=:sid"), {"sid": set_id}).fetchall() - existing_ids = [r[0] for r in existing] - new_ids = [str(q) for q in question_ids] - for qid in set(existing_ids) - set(new_ids): - conn.execute(text("DELETE FROM question_set_questions WHERE set_id=:sid AND question_id=:qid"), {"sid": set_id, "qid": qid}) - for qid in set(new_ids) - set(existing_ids): - conn.execute(text("INSERT INTO question_set_questions (set_id, question_id) VALUES (:sid, :qid)"), {"sid": set_id, "qid": qid}) + qs = [] + for qid in question_ids: + q_obj = session.get(QuestionORM, qid) + if q_obj: + qs.append(q_obj) + qset.questions = qs + session.commit() @staticmethod def delete(set_id: str) -> None: - engine = get_engine() - with engine.begin() as conn: - conn.execute(text("DELETE FROM question_set_questions WHERE set_id=:id"), {"id": set_id}) - conn.execute(text("DELETE FROM question_sets WHERE id=:id"), {"id": set_id}) + with get_session() as session: + qset = session.get(QuestionSetORM, set_id) + if qset: + session.delete(qset) + session.commit() diff --git a/models/test_result.py b/models/test_result.py index 5039410..8e87013 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -1,11 +1,13 @@ from dataclasses import dataclass -from typing import Dict, Optional +from typing import Dict import uuid import json import pandas as pd -from sqlalchemy import text +from sqlalchemy import select + +from models.db_utils import get_session +from models.orm_models import TestResultORM -from models.db_utils import get_engine @dataclass class TestResult: @@ -16,42 +18,62 @@ class TestResult: @staticmethod def load_all() -> pd.DataFrame: - df = pd.read_sql("SELECT * FROM test_results", get_engine()) - if 'results' in df.columns: - df['results'] = df['results'].apply(lambda x: json.loads(x) if isinstance(x, str) else {}) - df['id'] = df['id'].astype(str) - return df + with get_session() as session: + results = session.execute(select(TestResultORM)).scalars().all() + data = [] + for r in results: + data.append({ + "id": r.id, + "set_id": r.set_id, + "timestamp": r.timestamp, + "results": r.results or {}, + }) + columns = ["id", "set_id", "timestamp", "results"] + return pd.DataFrame(data, columns=columns) @staticmethod def save_df(df: pd.DataFrame) -> None: df_to_save = df.copy() if 'results' in df_to_save.columns: - df_to_save['results'] = df_to_save['results'].apply(lambda x: json.dumps(x) if isinstance(x, dict) else '{}') - engine = get_engine() - with engine.begin() as conn: - existing_ids = pd.read_sql('SELECT id FROM test_results', conn)['id'].astype(str).tolist() + df_to_save['results'] = df_to_save['results'].apply( + lambda x: json.dumps(x) if isinstance(x, dict) else '{}' + ) + with get_session() as session: + existing_ids = session.execute(select(TestResultORM.id)).scalars().all() incoming_ids = df_to_save['id'].astype(str).tolist() for rid in set(existing_ids) - set(incoming_ids): - conn.execute(text('DELETE FROM test_results WHERE id=:id'), {'id': rid}) + obj = session.get(TestResultORM, rid) + if obj: + session.delete(obj) for _, row in df_to_save.iterrows(): params = row.to_dict() - if row['id'] in existing_ids: - conn.execute(text('''UPDATE test_results SET set_id=:set_id, timestamp=:timestamp, results=:results WHERE id=:id'''), params) + obj = session.get(TestResultORM, params['id']) + if obj: + obj.set_id = params['set_id'] + obj.timestamp = params['timestamp'] + obj.results = json.loads(params['results']) else: - conn.execute(text('''INSERT INTO test_results (id, set_id, timestamp, results) VALUES (:id, :set_id, :timestamp, :results)'''), params) + session.add( + TestResultORM( + id=params['id'], + set_id=params['set_id'], + timestamp=params['timestamp'], + results=json.loads(params['results']), + ) + ) + session.commit() @staticmethod def add(set_id: str, results_data: Dict) -> str: result_id = str(uuid.uuid4()) - engine = get_engine() - with engine.begin() as conn: - conn.execute( - text('INSERT INTO test_results (id, set_id, timestamp, results) VALUES (:id, :set_id, :timestamp, :results)'), - { - 'id': result_id, - 'set_id': set_id, - 'timestamp': results_data.get('timestamp', ''), - 'results': json.dumps(results_data) - } + with get_session() as session: + session.add( + TestResultORM( + id=result_id, + set_id=set_id, + timestamp=results_data.get('timestamp', ''), + results=results_data, + ) ) + session.commit() return result_id diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..46f09cb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[tool.flake8] +max-line-length = 120 +extend-ignore = ["E203", "W503"] + +[tool.mypy] +python_version = "3.11" +strict = false +ignore_missing_imports = true diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..26c4295 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +flake8 +mypy diff --git a/requirements.txt b/requirements.txt index 5db8850..160bebf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,9 @@ openai>=1.0.0 sqlalchemy>=2.0.0 pymysql>=1.0.0 cryptography>=42.0.0 -# Note: uuid and configparser are built-in Python modules +# Dipendenze per i test +pytest>=7.0 +pytest-cov>=4.0 +# Nota: uuid e configparser sono moduli integrati in Python # installa con pip install -r requirements.txt diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/cache.py b/services/cache.py new file mode 100644 index 0000000..3335474 --- /dev/null +++ b/services/cache.py @@ -0,0 +1,47 @@ +from functools import lru_cache +import pandas as pd + +from models.question import Question +from models.question_set import QuestionSet +from models.api_preset import APIPreset +from models.test_result import TestResult + + +@lru_cache(maxsize=1) +def get_questions() -> pd.DataFrame: + return Question.load_all() + + +def refresh_questions() -> pd.DataFrame: + get_questions.cache_clear() + return get_questions() + + +@lru_cache(maxsize=1) +def get_question_sets() -> pd.DataFrame: + return QuestionSet.load_all() + + +def refresh_question_sets() -> pd.DataFrame: + get_question_sets.cache_clear() + return get_question_sets() + + +@lru_cache(maxsize=1) +def get_api_presets() -> pd.DataFrame: + return APIPreset.load_all() + + +def refresh_api_presets() -> pd.DataFrame: + get_api_presets.cache_clear() + return get_api_presets() + + +@lru_cache(maxsize=1) +def get_results() -> pd.DataFrame: + return TestResult.load_all() + + +def refresh_results() -> pd.DataFrame: + get_results.cache_clear() + return get_results() diff --git a/services/evaluation_service.py b/services/evaluation_service.py new file mode 100644 index 0000000..3bb9e40 --- /dev/null +++ b/services/evaluation_service.py @@ -0,0 +1,147 @@ +import json +import logging +from openai import APIConnectionError, RateLimitError, APIStatusError + +from services import openai_service + +__all__ = ["evaluate_answer"] + + +def evaluate_answer( + question: str, + expected_answer: str, + actual_answer: str, + client_config: dict, + show_api_details: bool = False, +): + """Valuta una risposta utilizzando un LLM specificato tramite client_config.""" + client = openai_service.get_openai_client( + api_key=client_config.get("api_key"), + base_url=client_config.get("endpoint"), + ) + if not client: + return { + "score": 0, + "explanation": "Errore: Client API per la valutazione non configurato.", + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + + prompt = f""" + Sei un valutatore esperto che valuta la qualità delle risposte alle domande. + Domanda: {question} + Risposta Attesa: {expected_answer} + Risposta Effettiva: {actual_answer} + + Valuta la risposta effettiva rispetto alla risposta attesa in base a: + 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? + 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? + 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? + Calcola un punteggio complessivo (0-100) basato su queste metriche. + Fornisci una breve spiegazione della tua valutazione (max 100 parole). + Formatta la tua risposta come un oggetto JSON con questi campi: + - score: il punteggio complessivo (numero) + - explanation: la tua spiegazione (stringa) + - similarity: punteggio di somiglianza (numero) + - correctness: punteggio di correttezza (numero) + - completeness: punteggio di completezza (numero) + Esempio di risposta JSON: + {{ + "score": 95, + "explanation": "La risposta è corretta e completa", + "similarity": 90, + "correctness": 100, + "completeness": 95 + }} + """ + + api_request_details = { + "model": client_config.get("model", openai_service.DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.0), + "max_tokens": client_config.get("max_tokens", 250), + "response_format": {"type": "json_object"}, + } + + api_details_for_log = {} + if show_api_details: + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + choices = getattr(response, "choices", None) + if not choices: + logging.error("Risposta API priva di 'choices' validi") + if show_api_details: + api_details_for_log["response_content"] = "" + return { + "score": 0, + "explanation": "Errore: risposta API non valida.", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + content = choices[0].message.content or "{}" + if show_api_details: + api_details_for_log["response_content"] = content + + try: + evaluation = json.loads(content) + required_keys = [ + "score", + "explanation", + "similarity", + "correctness", + "completeness", + ] + if not all(key in evaluation for key in required_keys): + logging.warning( + f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." + ) + for key in required_keys: + if key not in evaluation: + evaluation[key] = ( + 0 if key != "explanation" else "Valutazione incompleta o formato JSON non corretto." + ) + + evaluation["api_details"] = api_details_for_log + return evaluation + except json.JSONDecodeError: + logging.error( + f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" + ) + return { + "score": 0, + "explanation": f"Errore di decodifica JSON: {content[:100]}...", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") + api_details_for_log["error"] = str(e) + return { + "score": 0, + "explanation": f"Errore API: {type(e).__name__}", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + except Exception as exc: + logging.error( + f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" + ) + api_details_for_log["error"] = str(exc) + return { + "score": 0, + "explanation": f"Errore imprevisto: {type(exc).__name__}", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } diff --git a/services/openai_service.py b/services/openai_service.py new file mode 100644 index 0000000..824ddc2 --- /dev/null +++ b/services/openai_service.py @@ -0,0 +1,93 @@ +"""Utilità di supporto per interagire con l'API di OpenAI.""" + +# mypy: ignore-errors + +import logging +from openai import OpenAI + +DEFAULT_MODEL = "gpt-4o" +DEFAULT_ENDPOINT = "https://api.openai.com/v1" + +# Modelli disponibili per diversi provider (esempio) +OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] +ANTHROPIC_MODELS = [ + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307", +] +# Aggiungi altri provider e modelli se necessario +# XAI_MODELS = ["grok-1"] + + +def get_openai_client(api_key: str, base_url: str = None): + """ + Crea e restituisce un client OpenAI configurato. + Parametri: + api_key: La chiave API. + base_url: L'URL base dell'endpoint API (opzionale, default a OpenAI). + Restituisce: + Un'istanza del client OpenAI o None se la chiave API non è fornita. + """ + if not api_key: + logging.warning("Tentativo di creare client OpenAI senza chiave API.") + return None + try: + effective_base_url = ( + base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT + ) + return OpenAI(api_key=api_key, base_url=effective_base_url) + except Exception as exc: + logging.error(f"Errore durante la creazione del client OpenAI: {exc}") + return None + + +def get_available_models_for_endpoint( + provider_name: str, endpoint_url: str = None, api_key: str = None +): + """ + Restituisce una lista di modelli disponibili basata sul provider o tenta di elencarli dall'endpoint. + Parametri: + provider_name: Nome del provider (es. "OpenAI", "Anthropic", "Personalizzato"). + endpoint_url: URL dell'endpoint (rilevante per "Personalizzato"). + api_key: Chiave API per autenticarsi (necessaria per elencare modelli da endpoint personalizzati). + Restituisce: + Una lista di stringhe con i nomi dei modelli. + """ + if provider_name == "OpenAI": + return OPENAI_MODELS + elif provider_name == "Anthropic": + return ANTHROPIC_MODELS + # Aggiungi altri provider predefiniti qui + # elif provider_name == "XAI": + # return XAI_MODELS + elif provider_name == "Personalizzato": + if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): + return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] + + client = get_openai_client(api_key=api_key, base_url=endpoint_url) + if not client: + return ["(Errore creazione client API)", DEFAULT_MODEL] + try: + models = client.models.list() + filtered_models = sorted( + [ + model.id + for model in models + if not any(term in model.id.lower() for term in ["embed", "embedding"]) + and ( + any( + term in model.id.lower() + for term in ["chat", "instruct", "gpt", "claude", "grok"] + ) + or len(model.id.split("-")) > 2 + ) + ] + ) + if not filtered_models: + filtered_models = sorted( + [model.id for model in models if not any(term in model.id.lower() for term in ["embed", "embedding"])] + ) + return filtered_models if filtered_models else [DEFAULT_MODEL] + except Exception: + return ["(Errore recupero modelli)", DEFAULT_MODEL] + return [DEFAULT_MODEL] diff --git a/services/question_service.py b/services/question_service.py new file mode 100644 index 0000000..1d61caa --- /dev/null +++ b/services/question_service.py @@ -0,0 +1,50 @@ +"""Funzioni di utilità per gestire le domande e la relativa cache.""" +import pandas as pd +from models.question import Question +from services.cache import ( + get_questions as _get_questions, + refresh_questions as _refresh_questions, +) + + +def load_questions() -> pd.DataFrame: + """Restituisce tutte le domande utilizzando la cache.""" + return _get_questions() + + +def refresh_questions() -> pd.DataFrame: + """Svuota e ricarica la cache delle domande.""" + return _refresh_questions() + + +def add_question_if_not_exists( + question_id: str, + domanda: str, + risposta_attesa: str, + categoria: str = "", +) -> bool: + """Aggiunge una domanda solo se non esiste già. + + Parametri + ---------- + question_id: str + Identificatore della domanda da aggiungere. + domanda: str + Testo della domanda. + risposta_attesa: str + Risposta attesa. + categoria: str, opzionale + Categoria della domanda. + + Restituisce + ------- + bool + ``True`` se la domanda è stata aggiunta, ``False`` se esisteva già. + """ + df = Question.load_all() + if str(question_id) in df["id"].astype(str).values: + return False + + Question.add(domanda, risposta_attesa, categoria, question_id) + refresh_questions() + return True diff --git a/services/question_set_importer.py b/services/question_set_importer.py new file mode 100644 index 0000000..54f8e0b --- /dev/null +++ b/services/question_set_importer.py @@ -0,0 +1,256 @@ +import os +import json +from typing import Any, Dict, List, Tuple + +import pandas as pd + +from services.question_service import add_question_if_not_exists +from models.question_set import QuestionSet +from services.cache import refresh_question_sets + + +REQUIRED_CSV_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] + + +def parse_input(uploaded_file) -> List[Dict[str, Any]]: + """Analizza un file CSV o JSON in una lista di dizionari di set di domande. + + Ogni elemento della lista restituita è un dizionario con le chiavi ``name`` e + ``questions``. Per i file CSV le righe sono raggruppate per la colonna ``name``. + + Solleva + ------ + ValueError + Se il file non contiene le colonne richieste o contiene JSON non valido. + """ + file_extension = os.path.splitext(uploaded_file.name)[1].lower() + + if file_extension == ".csv": + df = pd.read_csv(uploaded_file) + missing = [c for c in REQUIRED_CSV_COLUMNS if c not in df.columns] + if missing: + raise ValueError( + "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_CSV_COLUMNS) + ) + + sets_dict: Dict[str, List[Dict[str, str]]] = {} + for _, row in df.iterrows(): + name = str(row["name"]).strip() + if not name: + continue + question = { + "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", + "domanda": str(row["domanda"]).strip() + if not pd.isna(row["domanda"]) + else "", + "risposta_attesa": str(row["risposta_attesa"]).strip() + if not pd.isna(row["risposta_attesa"]) + else "", + "categoria": str(row["categoria"]).strip() + if not pd.isna(row["categoria"]) + else "", + } + sets_dict.setdefault(name, []).append(question) + return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] + + # Analisi JSON + string_data = uploaded_file.getvalue().decode("utf-8") + data = json.loads(string_data) + if not isinstance(data, list): + raise ValueError( + "Formato JSON non valido. Il file deve contenere una lista (array) di set." + ) + return data + + +def resolve_question_ids( + questions_in_set_data: List[Any], + current_questions: pd.DataFrame, +) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: + """Risolve gli identificatori delle domande per un set di domande. + + Parametri + ---------- + questions_in_set_data: + Una lista che descrive le domande in un set. Ogni elemento può essere + un dizionario con i dettagli oppure una stringa identificativa. + current_questions: + DataFrame contenente le domande attualmente note. + + Restituisce + ------- + question_ids: List[str] + La lista degli identificatori di domanda risolti. + current_questions: pd.DataFrame + DataFrame aggiornato che include eventuali nuove domande create. + new_added: int + Numero di domande create durante il processo. + existing_found: int + Numero di domande già esistenti trovate. + warnings: List[str] + Eventuali avvisi riscontrati durante la risoluzione. + """ + warnings: List[str] = [] + question_ids: List[str] = [] + new_added = 0 + existing_found = 0 + + for q_idx, q_data in enumerate(questions_in_set_data): + if isinstance(q_data, dict): + q_id = str(q_data.get("id", "")) + q_text = q_data.get("domanda", "") + q_answer = q_data.get("risposta_attesa", "") + q_category = q_data.get("categoria", "") + else: + q_id = str(q_data) + q_text = "" + q_answer = "" + q_category = "" + + if not q_id: + warnings.append( + f"Domanda #{q_idx + 1} senza ID (saltata)." + ) + continue + + if q_text and q_answer: + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + was_added = add_question_if_not_exists( + question_id=q_id, + domanda=q_text, + risposta_attesa=q_answer, + categoria=q_category, + ) + if was_added: + new_added += 1 + question_ids.append(q_id) + new_row = pd.DataFrame( + { + "id": [q_id], + "domanda": [q_text], + "risposta_attesa": [q_answer], + "categoria": [q_category], + } + ) + current_questions = pd.concat([current_questions, new_row], ignore_index=True) + else: + existing_found += 1 + question_ids.append(q_id) + continue + + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + warnings.append( + f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." + ) + + return question_ids, current_questions, new_added, existing_found, warnings + + +def persist_sets( + sets_data: List[Dict[str, Any]], + current_questions: pd.DataFrame, + current_sets: pd.DataFrame, +) -> Dict[str, Any]: + """Crea set di domande dai dati analizzati. + + Parametri + ---------- + sets_data: + Dati elaborati che descrivono i set da creare. + current_questions: + DataFrame delle domande attualmente note. + current_sets: + DataFrame dei set di domande esistenti. + + Restituisce + ------- + Dict[str, Any] + Un dizionario contenente conteggi, avvisi, flag di successo e + i DataFrame aggiornati per domande e set. + """ + sets_imported_count = 0 + new_questions_added_count = 0 + existing_questions_found_count = 0 + warnings: List[str] = [] + + for set_idx, set_data in enumerate(sets_data): + if not isinstance(set_data, dict): + warnings.append( + f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." + ) + continue + + set_name = set_data.get("name") + questions_in_set_data = set_data.get("questions", []) + + if not set_name or not isinstance(set_name, str) or not set_name.strip(): + warnings.append( + f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." + ) + continue + + if not isinstance(questions_in_set_data, list): + warnings.append( + f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." + ) + continue + + if set_name in current_sets.get("name", pd.Series([])).values: + warnings.append( + f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." + ) + continue + + question_ids, current_questions, added, existing, q_warnings = resolve_question_ids( + questions_in_set_data, current_questions + ) + warnings.extend(q_warnings) + + if question_ids or len(questions_in_set_data) == 0: + try: + QuestionSet.create(set_name, question_ids) + sets_imported_count += 1 + except Exception as e: + warnings.append( + f"Errore durante la creazione del set '{set_name}': {e}" + ) + else: + warnings.append( + f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + ) + + new_questions_added_count += added + existing_questions_found_count += existing + + sets_df = refresh_question_sets() + + success = sets_imported_count > 0 or new_questions_added_count > 0 + success_message = "" + if success: + parts = [] + if sets_imported_count > 0: + parts.append(f"{sets_imported_count} set importati") + if new_questions_added_count > 0: + parts.append(f"{new_questions_added_count} nuove domande aggiunte") + if existing_questions_found_count > 0: + parts.append( + f"{existing_questions_found_count} domande esistenti referenziate" + ) + success_message = ". ".join(parts) + "." + + return { + "sets_imported_count": sets_imported_count, + "new_questions_added_count": new_questions_added_count, + "existing_questions_found_count": existing_questions_found_count, + "questions_df": current_questions, + "sets_df": sets_df, + "warnings": warnings, + "success": success, + "success_message": success_message, + } diff --git a/tests/test_evaluation_service.py b/tests/test_evaluation_service.py new file mode 100644 index 0000000..aab6f64 --- /dev/null +++ b/tests/test_evaluation_service.py @@ -0,0 +1,88 @@ +import json +import logging +import os +import sys +from unittest.mock import Mock, patch + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from services import evaluation_service + + +def _mock_response(content: str): + mock_resp = Mock() + mock_choice = Mock() + mock_choice.message = Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +def _mock_response_no_choices(): + mock_resp = Mock() + mock_resp.choices = [] + return mock_resp + + +@patch("services.evaluation_service.openai_service.get_openai_client") +def test_evaluate_answer_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + + evaluation = { + "score": 90, + "explanation": "good", + "similarity": 90, + "correctness": 90, + "completeness": 90, + } + mock_client.chat.completions.create.return_value = _mock_response( + json.dumps(evaluation) + ) + + result = evaluation_service.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"}, show_api_details=True + ) + + assert result["score"] == 90 + assert result["similarity"] == 90 + assert "api_details" in result + + +@patch("services.evaluation_service.openai_service.get_openai_client", return_value=None) +def test_evaluate_answer_no_client(mock_get_client): + result = evaluation_service.evaluate_answer( + "q", "expected", "actual", {"api_key": None} + ) + + assert result["score"] == 0 + assert "Client API" in result["explanation"] + + +@patch("services.evaluation_service.openai_service.get_openai_client") +def test_evaluate_answer_json_decode_error(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response("not json") + + result = evaluation_service.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert result["score"] == 0 + assert "Errore di decodifica JSON" in result["explanation"] + + +@patch("services.evaluation_service.openai_service.get_openai_client") +def test_evaluate_answer_no_choices(mock_get_client, caplog): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response_no_choices() + + with caplog.at_level(logging.ERROR): + result = evaluation_service.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert result["score"] == 0 + assert "choices" in caplog.text diff --git a/tests/test_openai_controller.py b/tests/test_openai_controller.py new file mode 100644 index 0000000..1b9fce6 --- /dev/null +++ b/tests/test_openai_controller.py @@ -0,0 +1,112 @@ +import os +import sys +from unittest.mock import Mock, patch + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import openai_controller # noqa: E402 + + +def _mock_response(content: str): + mock_resp = Mock() + mock_choice = Mock() + mock_choice.message = Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +@patch("controllers.openai_controller.evaluation_service.evaluate_answer") +def test_evaluate_answer_delegates(mock_service): + evaluation = { + "score": 90, + "explanation": "good", + "similarity": 90, + "correctness": 90, + "completeness": 90, + } + mock_service.return_value = evaluation + + result = openai_controller.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"}, show_api_details=True + ) + + assert result == evaluation + mock_service.assert_called_once_with( + "q", "expected", "actual", {"api_key": "key"}, True + ) + + +@patch("services.openai_service.get_openai_client") +def test_generate_example_answer_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response(" answer ") + + result = openai_controller.generate_example_answer_with_llm( + "question", {"api_key": "key"} + ) + + assert result["answer"] == "answer" + + +@patch("services.openai_service.get_openai_client", return_value=None) +def test_generate_example_answer_no_client(mock_get_client): + result = openai_controller.generate_example_answer_with_llm( + "question", {"api_key": None}, show_api_details=True + ) + + assert result["answer"] is None + assert result["api_details"]["error"] == "Client API non configurato" + + +@patch("services.openai_service.get_openai_client") +def test_generate_example_answer_empty_question(mock_get_client): + mock_get_client.return_value = Mock() + + result = openai_controller.generate_example_answer_with_llm( + "", {"api_key": "key"}, show_api_details=True + ) + + assert result["answer"] is None + assert result["api_details"]["error"] == "Domanda vuota o non valida" + + +@patch("services.openai_service.get_openai_client") +def test_test_api_connection_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + "Connessione riuscita." + ) + + ok, msg = openai_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is True + assert msg == "Connessione API riuscita!" + + +@patch("services.openai_service.get_openai_client") +def test_test_api_connection_unexpected_response(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response("failure") + + ok, msg = openai_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Risposta inattesa" in msg + + +@patch("services.openai_service.get_openai_client", return_value=None) +def test_test_api_connection_no_client(mock_get_client): + ok, msg = openai_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Client API non inizializzato" in msg diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index 18c3d78..1717288 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -1,18 +1,34 @@ -from controllers import question_controller +import os +import sys +from unittest.mock import patch -def test_add_update_delete_question(): - qid = question_controller.add_question("Domanda?", "Risposta", "cat") - df = question_controller.load_questions() - assert qid in df["id"].values +sys.path.append(os.path.dirname(os.path.dirname(__file__))) - question_controller.update_question(qid, domanda="Nuova domanda", risposta_attesa="Nuova", categoria="newcat") - df2 = question_controller.load_questions() - row = df2[df2["id"] == qid].iloc[0] - assert row["domanda"] == "Nuova domanda" - assert row["risposta_attesa"] == "Nuova" - assert row["categoria"] == "newcat" +from controllers import question_controller # noqa: E402 - question_controller.delete_question(qid) - df3 = question_controller.load_questions() - assert qid not in df3["id"].values + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.update") +def test_update_question_success(mock_update, mock_refresh): + mock_update.return_value = True + + result = question_controller.update_question( + "qid", domanda="d", risposta_attesa="a", categoria="c" + ) + + assert result is True + mock_update.assert_called_once_with("qid", "d", "a", "c") + mock_refresh.assert_called_once() + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.update") +def test_update_question_failure(mock_update, mock_refresh): + mock_update.return_value = False + + result = question_controller.update_question("qid") + + assert result is False + mock_update.assert_called_once_with("qid", None, None, None) + mock_refresh.assert_called_once() diff --git a/tests/test_question_service.py b/tests/test_question_service.py new file mode 100644 index 0000000..e95eb04 --- /dev/null +++ b/tests/test_question_service.py @@ -0,0 +1,40 @@ +import pandas as pd +from unittest.mock import patch + +from services import question_service + + +@patch("services.question_service.refresh_questions") +@patch("services.question_service.Question.add") +@patch("services.question_service.Question.load_all") +def test_add_question_if_not_exists_existing(mock_load_all, mock_add, mock_refresh): + mock_load_all.return_value = pd.DataFrame({"id": ["123"]}) + + result = question_service.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", + ) + + assert result is False + mock_add.assert_not_called() + mock_refresh.assert_not_called() + + +@patch("services.question_service.refresh_questions") +@patch("services.question_service.Question.add") +@patch("services.question_service.Question.load_all") +def test_add_question_if_not_exists_new(mock_load_all, mock_add, mock_refresh): + mock_load_all.return_value = pd.DataFrame({"id": ["456"]}) + + result = question_service.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", + ) + + assert result is True + mock_add.assert_called_once_with("dom", "ans", "cat", "123") + mock_refresh.assert_called_once() diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py new file mode 100644 index 0000000..3f28ad3 --- /dev/null +++ b/tests/test_question_set_importer.py @@ -0,0 +1,90 @@ +import io +import json +from unittest.mock import patch + +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import pandas as pd +import pytest + +from services.question_set_importer import ( + parse_input, + resolve_question_ids, + persist_sets, +) + + +def test_parse_input_csv_missing_columns(): + csv_content = "name,id,domanda\nset1,1,Domanda" + file = io.StringIO(csv_content) + file.name = "test.csv" + with pytest.raises(ValueError): + parse_input(file) + + +def test_parse_input_json_not_list(): + data = {"name": "set1"} + file = io.BytesIO(json.dumps(data).encode("utf-8")) + file.name = "test.json" + with pytest.raises(ValueError): + parse_input(file) + + +@patch("services.question_set_importer.add_question_if_not_exists") +def test_resolve_question_ids_adds_and_existing(mock_add): + mock_add.return_value = True + current_questions = pd.DataFrame( + [{"id": "2", "domanda": "", "risposta_attesa": "", "categoria": ""}] + ) + questions = [ + {"id": "1", "domanda": "Q1", "risposta_attesa": "A1", "categoria": ""}, + {"id": "2"}, + ] + ids, updated_df, new_added, existing_found, warnings = resolve_question_ids( + questions, current_questions + ) + assert ids == ["1", "2"] + assert new_added == 1 + assert existing_found == 1 + assert warnings == [] + assert "1" in updated_df["id"].values + mock_add.assert_called_once() + + +def test_resolve_question_ids_missing_id(): + current_questions = pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ) + questions = [{"domanda": "Q", "risposta_attesa": "A"}] + ids, updated_df, new_added, existing_found, warnings = resolve_question_ids( + questions, current_questions + ) + assert ids == [] + assert new_added == 0 + assert existing_found == 0 + assert len(warnings) == 1 + assert updated_df.empty + + +@patch("services.question_set_importer.refresh_question_sets") +@patch("services.question_set_importer.QuestionSet.create") +def test_persist_sets_skips_duplicates(mock_create, mock_refresh): + mock_refresh.return_value = pd.DataFrame( + [{"id": "s1", "name": "Existing", "questions": []}] + ) + current_questions = pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ) + current_sets = pd.DataFrame( + [{"id": "s1", "name": "Existing", "questions": []}] + ) + sets_data = [ + {"name": "Existing", "questions": []}, + {"name": "New", "questions": []}, + ] + result = persist_sets(sets_data, current_questions, current_sets) + assert result["sets_imported_count"] == 1 + assert any("esiste già" in w for w in result["warnings"]) + mock_create.assert_called_once_with("New", []) diff --git a/tests/test_statistics.py b/tests/test_statistics.py new file mode 100644 index 0000000..6b1f9f3 --- /dev/null +++ b/tests/test_statistics.py @@ -0,0 +1,43 @@ +import os +import sys +import pytest + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from controllers.test_controller import calculate_statistics # noqa: E402 + + +def test_calculate_statistics(): + results = { + "q1": { + "question": "Domanda 1", + "evaluation": { + "score": 80, + "similarity": 70, + "correctness": 90, + "completeness": 60, + }, + }, + "q2": { + "question": "Domanda 2", + "evaluation": { + "score": 60, + "similarity": 50, + "correctness": 40, + "completeness": 80, + }, + }, + } + stats = calculate_statistics(results) + assert stats["avg_score"] == pytest.approx(70.0) + assert len(stats["per_question_scores"]) == 2 + assert {"question": "Domanda 1", "score": 80} in stats["per_question_scores"] + assert stats["radar_metrics"]["similarity"] == pytest.approx(60.0) + assert stats["radar_metrics"]["correctness"] == pytest.approx(65.0) + assert stats["radar_metrics"]["completeness"] == pytest.approx(70.0) + + +def test_calculate_statistics_empty(): + stats = calculate_statistics({}) + assert stats["avg_score"] == 0 + assert stats["per_question_scores"] == [] + assert stats["radar_metrics"] == {"similarity": 0, "correctness": 0, "completeness": 0} diff --git a/view/__init__.py b/view/__init__.py new file mode 100644 index 0000000..c5fcef4 --- /dev/null +++ b/view/__init__.py @@ -0,0 +1 @@ +"""View package.""" diff --git a/view/api_configurazione.py b/view/api_configurazione.py index d1e88e0..852a6a5 100644 --- a/view/api_configurazione.py +++ b/view/api_configurazione.py @@ -1,33 +1,26 @@ import streamlit as st -import uuid -import pandas as pd -from controllers.openai_controller import ( - test_api_connection, DEFAULT_MODEL, DEFAULT_ENDPOINT -) +from controllers.openai_controller import test_api_connection from view.style_utils import add_page_header, add_section_title -from view.component_utils import create_card -from controllers.api_preset_controller import load_presets, save_presets, delete_preset - -add_page_header( - "Gestione Preset API", - icon="⚙️", - description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." +from controllers.api_preset_controller import ( + save_preset, + delete_preset, + load_presets, + list_presets, + get_preset_by_id, + validate_preset, + get_default_api_settings, ) -# Stato della sessione per la gestione del form di creazione/modifica preset -if "editing_preset" not in st.session_state: st.session_state.editing_preset = False -if "current_preset_edit_id" not in st.session_state: st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica -if "preset_form_data" not in st.session_state: st.session_state.preset_form_data = {} - -# Carica sempre i preset API dal database -st.session_state.api_presets = load_presets() +DEFAULT_API_SETTINGS = get_default_api_settings() +DEFAULT_MODEL = DEFAULT_API_SETTINGS["model"] +DEFAULT_ENDPOINT = DEFAULT_API_SETTINGS["endpoint"] # Funzioni di callback per i pulsanti del form def start_new_preset_edit(): st.session_state.editing_preset = True - st.session_state.current_preset_edit_id = None # Indica nuovo preset + st.session_state.current_preset_edit_id = None # Indica nuovo preset st.session_state.preset_form_data = { "name": "", "endpoint": DEFAULT_ENDPOINT, @@ -37,22 +30,32 @@ def start_new_preset_edit(): "max_tokens": 1000 } + def start_existing_preset_edit(preset_id): - preset_to_edit = st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() + preset_to_edit = get_preset_by_id(preset_id, st.session_state.api_presets) + if not preset_to_edit: + st.error("Preset non trovato.") + return st.session_state.editing_preset = True st.session_state.current_preset_edit_id = preset_id st.session_state.preset_form_data = preset_to_edit.copy() # Assicura che i campi numerici siano del tipo corretto per gli slider/number_input - st.session_state.preset_form_data["temperature"] = float(st.session_state.preset_form_data.get("temperature", 0.0)) - st.session_state.preset_form_data["max_tokens"] = int(st.session_state.preset_form_data.get("max_tokens", 1000)) + st.session_state.preset_form_data["temperature"] = float( + st.session_state.preset_form_data.get("temperature", 0.0) + ) + st.session_state.preset_form_data["max_tokens"] = int( + st.session_state.preset_form_data.get("max_tokens", 1000) + ) if "endpoint" not in st.session_state.preset_form_data: st.session_state.preset_form_data["endpoint"] = DEFAULT_ENDPOINT + def cancel_preset_edit(): st.session_state.editing_preset = False st.session_state.current_preset_edit_id = None st.session_state.preset_form_data = {} + def save_preset_from_form(): """Salva un preset leggendo i valori direttamente dagli input della form.""" # Recupera sempre i valori correnti dei widget dal session_state @@ -86,181 +89,196 @@ def save_preset_from_form(): ) form_data = st.session_state.preset_form_data.copy() - - if not preset_name: - st.error("Il nome del preset non può essere vuoto.") - return - current_id = st.session_state.current_preset_edit_id - presets_df = st.session_state.api_presets - - # Controlla se il nome del preset esiste già (escludendo il preset corrente se in modifica) - existing_names = presets_df["name"].tolist() - if current_id: - current_preset_original_name = presets_df[presets_df["id"] == current_id].iloc[0]["name"] - if preset_name != current_preset_original_name and preset_name in existing_names: - st.error(f"Un altro preset con nome '{preset_name}' esiste già.") - return - elif preset_name in existing_names: - st.error(f"Un preset con nome '{preset_name}' esiste già.") + + is_valid, validation_message = validate_preset(form_data, current_id) + if not is_valid: + st.error(validation_message) return - # Prepara i dati del preset da salvare - preset_data_to_save = { - "name": preset_name, # Usa il valore validato - "endpoint": form_data.get("endpoint"), - "api_key": form_data.get("api_key"), - "model": form_data.get("model"), - "temperature": float(form_data.get("temperature", 0.0)), - "max_tokens": int(form_data.get("max_tokens", 1000)) - } + success, message, updated_df = save_preset(form_data, current_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + cancel_preset_edit() # Chiudi il form + else: + st.error(message) - if current_id: # Modifica preset esistente - idx = presets_df.index[presets_df["id"] == current_id].tolist()[0] - for key, value in preset_data_to_save.items(): - presets_df.loc[idx, key] = value - st.success(f"Preset '{preset_name}' aggiornato con successo!") - else: # Crea nuovo preset - new_id = str(uuid.uuid4()) - preset_data_to_save["id"] = new_id - new_preset_df = pd.DataFrame([preset_data_to_save]) - presets_df = pd.concat([presets_df, new_preset_df], ignore_index=True) - st.success(f"Preset '{preset_name}' creato con successo!") - - st.session_state.api_presets = presets_df - save_presets(presets_df) - cancel_preset_edit() # Chiudi il form def delete_preset_callback(preset_id): - presets_df = st.session_state.api_presets - preset_name_to_delete = presets_df[presets_df["id"] == preset_id].iloc[0]["name"] - st.session_state.api_presets = presets_df[presets_df["id"] != preset_id] - save_presets(st.session_state.api_presets) - delete_preset(preset_id) - st.success(f"Preset '{preset_name_to_delete}' eliminato.") - if st.session_state.current_preset_edit_id == preset_id: - cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form - -# Sezione per visualizzare/modificare i preset -if st.session_state.editing_preset: - add_section_title("Modifica/Crea Preset API", icon="✏️") - form_data = st.session_state.preset_form_data - - with st.form(key="preset_form"): - # Usa un key specifico per il campo nome e aggiorna il form_data - form_data["name"] = st.text_input( - "Nome del Preset", - value=form_data.get("name", ""), - key="preset_name", # Key esplicita per il campo nome - help="Un nome univoco per questo preset." - ) - - # Campo chiave API con key esplicita - form_data["api_key"] = st.text_input( - "Chiave API", - value=form_data.get("api_key", ""), - type="password", - key="preset_api_key", # Key esplicita per la chiave API - help="La tua chiave API per il provider selezionato." - ) - - # Campo endpoint con key esplicita - form_data["endpoint"] = st.text_input( - "Provider Endpoint", - value=form_data.get("endpoint", DEFAULT_ENDPOINT), - placeholder="https://api.openai.com/v1", - key="preset_endpoint", # Key esplicita per l'endpoint - help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" - ) - - # Modello sempre personalizzabile - form_data["model"] = st.text_input( - "Modello", - value=form_data.get("model", DEFAULT_MODEL), - placeholder="gpt-4o", - key="preset_model", # Key esplicita per il modello - help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" - ) + success, message, updated_df = delete_preset(preset_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + if st.session_state.current_preset_edit_id == preset_id: + cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form + else: + st.error(message) - form_data["temperature"] = st.slider( - "Temperatura", - 0.0, - 2.0, - float(form_data.get("temperature", 0.0)), - 0.1, - key="preset_temperature", - ) - form_data["max_tokens"] = st.number_input( - "Max Tokens", - min_value=50, - max_value=8000, - value=int(form_data.get("max_tokens", 1000)), - step=50, - key="preset_max_tokens", - ) - - # Campo Test Connessione e pulsanti di salvataggio/annullamento - # Pulsante Test Connessione - if st.form_submit_button("⚡ Testa Connessione API"): - # Usa direttamente i valori dal session_state per il test - api_key_to_test = st.session_state.get("preset_api_key", "") - endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) - model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) - - with st.spinner("Test in corso..."): - success, message = test_api_connection( - api_key=api_key_to_test, - endpoint=endpoint_to_test, - model=model_to_test, - temperature=form_data.get("temperature", 0.0), - max_tokens=form_data.get("max_tokens", 1000) - ) - if success: - st.success(message) - else: - st.error(message) - - # Pulsanti Salva e Annulla - cols_form_buttons = st.columns(2) - with cols_form_buttons[0]: - if st.form_submit_button("💾 Salva Preset", on_click=save_preset_from_form, type="primary", use_container_width=True): - pass # Il callback gestisce il salvataggio - with cols_form_buttons[1]: - if st.form_submit_button("❌ Annulla", on_click=cancel_preset_edit, use_container_width=True): - pass # Il callback gestisce il cambio di stato -else: - add_section_title("Preset API Salvati", icon="🗂️") - if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): - pass # Il callback gestisce il cambio di stato - - if st.session_state.api_presets.empty: - st.info("Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare.") + +def render(): + add_page_header( + "Gestione Preset API", + icon="⚙️", + description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." + ) + + # Stato della sessione per la gestione del form di creazione/modifica preset + if "editing_preset" not in st.session_state: + st.session_state.editing_preset = False + if "current_preset_edit_id" not in st.session_state: + st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica + if "preset_form_data" not in st.session_state: + st.session_state.preset_form_data = {} + + # Carica i preset API utilizzando la cache + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + + # Sezione per visualizzare/modificare i preset + if st.session_state.editing_preset: + add_section_title("Modifica/Crea Preset API", icon="✏️") + form_data = st.session_state.preset_form_data + + with st.form(key="preset_form"): + # Usa un key specifico per il campo nome e aggiorna il form_data + form_data["name"] = st.text_input( + "Nome del Preset", + value=form_data.get("name", ""), + key="preset_name", # Key esplicita per il campo nome + help="Un nome univoco per questo preset." + ) + + # Campo chiave API con key esplicita + form_data["api_key"] = st.text_input( + "Chiave API", + value=form_data.get("api_key", ""), + type="password", + key="preset_api_key", # Key esplicita per la chiave API + help="La tua chiave API per il provider selezionato." + ) + + # Campo endpoint con key esplicita + form_data["endpoint"] = st.text_input( + "Provider Endpoint", + value=form_data.get("endpoint", DEFAULT_ENDPOINT), + placeholder="https://api.openai.com/v1", + key="preset_endpoint", # Key esplicita per l'endpoint + help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" + ) + + # Modello sempre personalizzabile + form_data["model"] = st.text_input( + "Modello", + value=form_data.get("model", DEFAULT_MODEL), + placeholder="gpt-4o", + key="preset_model", # Key esplicita per il modello + help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" + ) + + form_data["temperature"] = st.slider( + "Temperatura", + 0.0, + 2.0, + float(form_data.get("temperature", 0.0)), + 0.1, + key="preset_temperature", + ) + form_data["max_tokens"] = st.number_input( + "Max Tokens", + min_value=50, + max_value=8000, + value=int(form_data.get("max_tokens", 1000)), + step=50, + key="preset_max_tokens", + ) + + # Campo Test Connessione e pulsanti di salvataggio/annullamento + # Pulsante Test Connessione + if st.form_submit_button("⚡ Testa Connessione API"): + # Usa direttamente i valori dal session_state per il test + api_key_to_test = st.session_state.get("preset_api_key", "") + endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) + + with st.spinner("Test in corso..."): + success, message = test_api_connection( + api_key=api_key_to_test, + endpoint=endpoint_to_test, + model=model_to_test, + temperature=form_data.get("temperature", 0.0), + max_tokens=form_data.get("max_tokens", 1000) + ) + if success: + st.success(message) + else: + st.error(message) + + # Pulsanti Salva e Annulla + cols_form_buttons = st.columns(2) + with cols_form_buttons[0]: + if st.form_submit_button( + "💾 Salva Preset", + on_click=save_preset_from_form, + type="primary", + use_container_width=True, + ): + pass # Il callback gestisce il salvataggio + with cols_form_buttons[1]: + if st.form_submit_button( + "❌ Annulla", + on_click=cancel_preset_edit, + use_container_width=True, + ): + pass # Il callback gestisce il cambio di stato else: - for index, preset in st.session_state.api_presets.iterrows(): - with st.container(): - st.markdown(f"#### {preset['name']}") - cols_preset_details = st.columns([3, 1, 1]) - with cols_preset_details[0]: - st.caption(f"Modello: {preset.get('model', 'N/A')}") - st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") - with cols_preset_details[1]: - if st.button("✏️ Modifica", key=f"edit_{preset['id']}", on_click=start_existing_preset_edit, args=(preset['id'],), use_container_width=True): - pass - with cols_preset_details[2]: - if st.button("🗑️ Elimina", key=f"delete_{preset['id']}", on_click=delete_preset_callback, args=(preset['id'],), type="secondary", use_container_width=True): - pass - st.divider() - -# Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) -if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui - st.success(st.session_state.preset_applied_message) - del st.session_state.preset_applied_message - -if "preset_saved_message" in st.session_state: - st.success(st.session_state.preset_saved_message) - del st.session_state.preset_saved_message - -if "preset_deleted_message" in st.session_state: - st.success(st.session_state.preset_deleted_message) - del st.session_state.preset_deleted_message + add_section_title("Preset API Salvati", icon="🗂️") + if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): + pass # Il callback gestisce il cambio di stato + + preset_list = list_presets(st.session_state.api_presets) + if not preset_list: + st.info( + "Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare." + ) + else: + for preset in preset_list: + with st.container(): + st.markdown(f"#### {preset['name']}") + cols_preset_details = st.columns([3, 1, 1]) + with cols_preset_details[0]: + st.caption(f"Modello: {preset.get('model', 'N/A')}") + st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") + with cols_preset_details[1]: + if st.button( + "✏️ Modifica", + key=f"edit_{preset['id']}", + on_click=start_existing_preset_edit, + args=(preset['id'],), + use_container_width=True, + ): + pass + with cols_preset_details[2]: + if st.button( + "🗑️ Elimina", + key=f"delete_{preset['id']}", + on_click=delete_preset_callback, + args=(preset['id'],), + type="secondary", + use_container_width=True, + ): + pass + st.divider() + + # Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) + if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui + st.success(st.session_state.preset_applied_message) + del st.session_state.preset_applied_message + + if "preset_saved_message" in st.session_state: + st.success(st.session_state.preset_saved_message) + del st.session_state.preset_saved_message + if "preset_deleted_message" in st.session_state: + st.success(st.session_state.preset_deleted_message) + del st.session_state.preset_deleted_message diff --git a/view/component_utils.py b/view/component_utils.py index 0d602cf..d9b5127 100644 --- a/view/component_utils.py +++ b/view/component_utils.py @@ -174,4 +174,3 @@ def create_metrics_container(metrics_data: list[dict]): metrics_html += '' st.markdown(metrics_html, unsafe_allow_html=True) - diff --git a/view/esecuzione_test.py b/view/esecuzione_test.py index 29fd0de..f7fa14e 100644 --- a/view/esecuzione_test.py +++ b/view/esecuzione_test.py @@ -1,17 +1,9 @@ import streamlit as st -import pandas as pd -import time -from datetime import datetime -from controllers.question_controller import load_questions +from controllers.test_controller import execute_llm_test from controllers.question_set_controller import load_sets -from controllers.test_controller import add_result, load_results from controllers.api_preset_controller import load_presets -from controllers.openai_controller import ( - evaluate_answer, generate_example_answer_with_llm -) from view.style_utils import add_page_header, add_section_title -from view.component_utils import create_card # === FUNZIONI DI CALLBACK === @@ -28,203 +20,141 @@ def run_llm_test_callback(): st.session_state.run_llm_test = True -# === Inizializzazione delle variabili di stato === -if 'test_mode' not in st.session_state: - st.session_state.test_mode = "Valutazione Automatica con LLM" -if 'mode_changed' not in st.session_state: - st.session_state.mode_changed = False -if 'run_llm_test' not in st.session_state: - st.session_state.run_llm_test = False - -# Gestisce il cambio di modalità -if st.session_state.mode_changed: - st.session_state.mode_changed = False - st.rerun() - -add_page_header( - "Esecuzione Test", - icon="🧪", - description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." -) - -# Carica sempre i dati necessari dal database -st.session_state.api_presets = load_presets() -st.session_state.question_sets = load_sets() -st.session_state.questions = load_questions() - -if st.session_state.api_presets.empty: - st.error( - "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' per crearne almeno uno prima di eseguire i test.") - st.stop() - -# Controlla se ci sono set di domande disponibili -if st.session_state.question_sets.empty: - st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") - st.stop() - - -# Ottieni testo della domanda e risposta attesa per ID -def get_question_data(question_id): - if 'questions' in st.session_state and not st.session_state.questions.empty: - question_row = st.session_state.questions[st.session_state.questions['id'] == str(question_id)] - if not question_row.empty: - # Assicurati che i nomi delle colonne ('domanda', 'risposta_attesa') coincidano con quelli - # forniti da question_controller.load_questions() - q = question_row.iloc[0].get('domanda', question_row.iloc[0].get('question', '')) - a = question_row.iloc[0].get('risposta_attesa', question_row.iloc[0].get('expected_answer', '')) - - # Verifica che domanda e risposta non siano vuote - if not q or not isinstance(q, str) or q.strip() == "": - st.error(f"La domanda con ID {question_id} è vuota o non valida.") - return None - - if not a or not isinstance(a, str) or a.strip() == "": - st.warning(f"La risposta attesa per la domanda con ID {question_id} è vuota o non valida.") - # Continuiamo comunque ma con una risposta vuota - a = "Risposta non disponibile" - - return {'question': q, 'expected_answer': a} - return None - - -# Seleziona set di domande per il test -add_section_title("Seleziona Set di Domande", icon="📚") -set_options = {} -if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: - for _, row in st.session_state.question_sets.iterrows(): - if 'questions' in row and row['questions']: - set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" - -if not set_options: - st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") - st.stop() - -selected_set_id = st.selectbox( - "Seleziona un set di domande", - options=list(set_options.keys()), - format_func=lambda x: set_options[x], - key="select_question_set_for_test" -) - -selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] -questions_in_set = selected_set['questions'] - - -# --- Opzioni API basate su Preset --- -add_section_title("Opzioni API basate su Preset", icon="🛠️") - -preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} -preset_display_names = list(preset_names_to_id.keys()) - - -def get_preset_config_by_name(name): - preset_id = preset_names_to_id.get(name) - if preset_id: - return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() - return None - - -# Seleziona preset per generazione risposta (comune a entrambe le modalità) -generation_preset_name = st.selectbox( - "Seleziona Preset per Generazione Risposta LLM", - options=preset_display_names, - index=0 if preset_display_names else None, # Seleziona il primo di default - key="generation_preset_select", - help="Il preset API utilizzato per generare la risposta alla domanda." -) -st.session_state.selected_generation_preset_name = generation_preset_name - -# Seleziona preset per valutazione (solo per modalità LLM) -if st.session_state.test_mode == "Valutazione Automatica con LLM": - evaluation_preset_name = st.selectbox( - "Seleziona Preset per Valutazione Risposta LLM", - options=preset_display_names, - index=0 if preset_display_names else None, # Seleziona il primo di default - key="evaluation_preset_select", - help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." +def render(): + # === Inizializzazione delle variabili di stato === + if 'test_mode' not in st.session_state: + st.session_state.test_mode = "Valutazione Automatica con LLM" + if 'mode_changed' not in st.session_state: + st.session_state.mode_changed = False + if 'run_llm_test' not in st.session_state: + st.session_state.run_llm_test = False + + # Gestisce il cambio di modalità + if st.session_state.mode_changed: + st.session_state.mode_changed = False + st.rerun() + + add_page_header( + "Esecuzione Test", + icon="🧪", + description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." ) - st.session_state.selected_evaluation_preset_name = evaluation_preset_name -show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) + # Carica i dati necessari, utilizzando cache e session state + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + if st.session_state.api_presets.empty: + st.error( + "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' " + "per crearne almeno uno prima di eseguire i test." + ) + st.stop() + + # Controlla se ci sono set di domande disponibili + if st.session_state.question_sets.empty: + st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") + st.stop() + + # Seleziona set di domande per il test + add_section_title("Seleziona Set di Domande", icon="📚") + set_options = {} + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + for _, row in st.session_state.question_sets.iterrows(): + if 'questions' in row and row['questions']: + set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" + + if not set_options: + st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") + st.stop() + + selected_set_id = st.selectbox( + "Seleziona un set di domande", + options=list(set_options.keys()), + format_func=lambda x: set_options[x], + key="select_question_set_for_test" + ) -# --- Logica di Esecuzione Test --- -test_mode_selected = st.session_state.test_mode + selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] + questions_in_set = selected_set['questions'] -if test_mode_selected == "Valutazione Automatica con LLM": - st.header("Esecuzione: Valutazione Automatica con LLM") + # --- Opzioni API basate su Preset --- + add_section_title("Opzioni API basate su Preset", icon="🛠️") - # Pulsante che utilizza la funzione di callback - st.button( - "🚀 Esegui Test con LLM", - key="run_llm_test_btn", - on_click=run_llm_test_callback - ) + preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} + preset_display_names = list(preset_names_to_id.keys()) - # Gestisce l'esecuzione del test - if st.session_state.run_llm_test: - st.session_state.run_llm_test = False # Resetta lo stato - - gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) - eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) - - if not gen_preset_config or not eval_preset_config: - st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") - else: - with st.spinner("Generazione risposte e valutazione LLM in corso..."): - results = {} - for q_id in questions_in_set: - q_data = get_question_data(q_id) - if q_data: - # Genera risposta di esempio usando LLM - generation_output = generate_example_answer_with_llm(q_data['question'], - client_config=gen_preset_config, - show_api_details=show_api_details) - actual_answer = generation_output["answer"] - generation_api_details = generation_output["api_details"] - - if actual_answer is None: - # Gestione errore generazione - results[q_id] = { - 'question': q_data['question'], - 'expected_answer': q_data['expected_answer'], - 'actual_answer': "Errore Generazione", - 'evaluation': {'score': 0, 'explanation': 'Generazione fallita'}, - 'generation_api_details': generation_api_details - # Salva anche se la generazione fallisce - } - continue - - evaluation = evaluate_answer(q_data['question'], q_data['expected_answer'], actual_answer, - client_config=eval_preset_config, - show_api_details=show_api_details) - results[q_id] = { - 'question': q_data['question'], - 'expected_answer': q_data['expected_answer'], - 'actual_answer': actual_answer, - 'evaluation': evaluation, # Questo conterrà i dettagli API della VALUTAZIONE - 'generation_api_details': generation_api_details # Dettagli API della GENERAZIONE - } - - # Salva e visualizza risultati - if results: - avg_score = sum(r['evaluation']['score'] for r in results.values()) / len(results) if results else 0 - result_data = { - 'set_name': selected_set['name'], - 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - 'avg_score': avg_score, - 'sample_type': 'Generata da LLM', - 'method': 'LLM', - 'generation_preset': gen_preset_config['name'], - 'evaluation_preset': eval_preset_config['name'], - 'questions': results - } - result_id = add_result(selected_set_id, result_data) - st.session_state.results = load_results() - st.success(f"Test LLM completato! Punteggio medio: {avg_score:.2f}%") + def get_preset_config_by_name(name): + preset_id = preset_names_to_id.get(name) + if preset_id: + return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() + return None + + # Seleziona preset per generazione risposta (comune a entrambe le modalità) + generation_preset_name = st.selectbox( + "Seleziona Preset per Generazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="generation_preset_select", + help="Il preset API utilizzato per generare la risposta alla domanda." + ) + st.session_state.selected_generation_preset_name = generation_preset_name + + # Seleziona preset per valutazione (solo per modalità LLM) + if st.session_state.test_mode == "Valutazione Automatica con LLM": + evaluation_preset_name = st.selectbox( + "Seleziona Preset per Valutazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="evaluation_preset_select", + help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." + ) + st.session_state.selected_evaluation_preset_name = evaluation_preset_name + + show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) + + # --- Logica di Esecuzione Test --- + test_mode_selected = st.session_state.test_mode + + if test_mode_selected == "Valutazione Automatica con LLM": + st.header("Esecuzione: Valutazione Automatica con LLM") + + # Pulsante che utilizza la funzione di callback + st.button( + "🚀 Esegui Test con LLM", + key="run_llm_test_btn", + on_click=run_llm_test_callback + ) + + # Gestisce l'esecuzione del test + if st.session_state.run_llm_test: + st.session_state.run_llm_test = False # Resetta lo stato + + gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) + eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) + + if not gen_preset_config or not eval_preset_config: + st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") + else: + with st.spinner("Generazione risposte e valutazione LLM in corso..."): + exec_result = execute_llm_test( + selected_set_id, + selected_set['name'], + questions_in_set, + gen_preset_config, + eval_preset_config, + show_api_details=show_api_details, + ) + + if exec_result: + st.session_state.results = exec_result['results_df'] + st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") # Visualizzazione risultati dettagliati st.subheader("Risultati Dettagliati") - for q_id, result in results.items(): + for q_id, result in exec_result['results'].items(): with st.expander( f"Domanda: {result['question'][:50]}..." ): @@ -236,4 +166,3 @@ def get_preset_config_by_name(name): st.write("**Risposta Generata:**", result['actual_answer']) st.write("**Punteggio:**", f"{result['evaluation']['score']:.1f}%") st.write("**Valutazione:**", result['evaluation']['explanation']) - diff --git a/view/gestione_domande.py b/view/gestione_domande.py index 847b386..8b3eb21 100644 --- a/view/gestione_domande.py +++ b/view/gestione_domande.py @@ -1,57 +1,80 @@ import streamlit as st import pandas as pd -import json from controllers.question_controller import ( - load_questions, add_question, update_question, delete_question, import_questions_from_file, + filter_questions_by_category, ) -from view.style_utils import add_page_header, add_section_title -from view.component_utils import create_card -from view.session_state import ensure_keys +from services.question_service import load_questions, refresh_questions +from view.style_utils import add_page_header +from view.state_models import QuestionPageState # === FUNZIONI DI CALLBACK === -def save_question_callback(question_id, edited_question, edited_answer, edited_category): - """Funzione di callback: salva le modifiche alla domanda""" - if update_question(question_id, domanda=edited_question, risposta_attesa=edited_answer, - categoria=edited_category): - st.session_state.save_success_message = "Domanda aggiornata con successo!" - st.session_state.save_success = True - # Aggiorna le domande in session_state per riflettere la modifica - st.session_state.questions.loc[st.session_state.questions['id'] == question_id, 'categoria'] = edited_category - st.session_state.trigger_rerun = True + +def save_question_action( + question_id, edited_question, edited_answer, edited_category +) -> QuestionPageState: + """Salva le modifiche alla domanda e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + if update_question( + question_id, + domanda=edited_question, + risposta_attesa=edited_answer, + categoria=edited_category, + ): + state.save_success = True + st.session_state.questions = refresh_questions() + state.trigger_rerun = True else: - st.session_state.save_error_message = "Impossibile aggiornare la domanda." - st.session_state.save_error = True + state.save_error = True + return state -def delete_question_callback(question_id): - """Funzione di callback: elimina la domanda""" - delete_question(question_id) - st.session_state.delete_success_message = "Domanda eliminata con successo!" - st.session_state.delete_success = True - st.session_state.trigger_rerun = True +def create_save_question_callback( + question_id, edited_question, edited_answer, edited_category +): + def callback(): + st.session_state.question_page_state = save_question_action( + question_id, edited_question, edited_answer, edited_category + ) + return callback + + +def delete_question_action(question_id) -> QuestionPageState: + """Elimina la domanda e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + delete_question(question_id) + state.delete_success = True + st.session_state.questions = refresh_questions() + state.trigger_rerun = True + return state -def import_questions_callback(): - """Funzione di callback: importa le domande""" - if 'uploaded_file_content' in st.session_state and st.session_state.uploaded_file_content is not None: - success, message = import_questions_from_file(st.session_state.uploaded_file_content) +def import_questions_action(uploaded_file) -> QuestionPageState: + """Importa le domande da file e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + if uploaded_file is not None: + success, message = import_questions_from_file(uploaded_file) if success: - st.session_state.import_success_message = message - st.session_state.import_success = True - # Ricarica le domande dal database per aggiornare lo stato - st.session_state.questions = load_questions() - st.session_state.trigger_rerun = True + state.import_success = True + state.import_success_message = message + st.session_state.questions = refresh_questions() + state.trigger_rerun = True else: - st.session_state.import_error_message = message - st.session_state.import_error = True + state.import_error = True + state.import_error_message = message + return state + + +def import_questions_callback(): + uploaded_file = st.session_state.get("uploaded_file_content") + st.session_state.question_page_state = import_questions_action(uploaded_file) # === FUNZIONI DI DIALOGO === @@ -59,7 +82,7 @@ def import_questions_callback(): @st.dialog("Conferma Eliminazione") def confirm_delete_question_dialog(question_id, question_text): """Dialogo di conferma per l'eliminazione della domanda""" - st.write(f"Sei sicuro di voler eliminare questa domanda?") + st.write("Sei sicuro di voler eliminare questa domanda?") st.write(f"**Domanda:** {question_text[:100]}...") st.warning("Questa azione non può essere annullata.") @@ -67,7 +90,7 @@ def confirm_delete_question_dialog(question_id, question_text): with col1: if st.button("Sì, Elimina", type="primary", use_container_width=True): - delete_question_callback(question_id) + st.session_state.question_page_state = delete_question_action(question_id) st.rerun() with col2: @@ -75,218 +98,198 @@ def confirm_delete_question_dialog(question_id, question_text): st.rerun() -# === Inizializzazione delle variabili di stato === -ensure_keys({ - "save_success": False, - "save_error": False, - "delete_success": False, - "add_success": False, - "import_success": False, - "import_error": False, - "trigger_rerun": False, -}) - -# Carica sempre le domande dal database per la visualizzazione -st.session_state.questions = load_questions() - -# Gestisce la logica di rerun -if st.session_state.trigger_rerun: - st.session_state.trigger_rerun = False - st.rerun() - -# Mostra i messaggi di stato -if st.session_state.save_success: - st.success(st.session_state.get('save_success_message', 'Operazione completata con successo!')) - st.session_state.save_success = False - -if st.session_state.save_error: - st.error(st.session_state.get('save_error_message', 'Si è verificato un errore.')) - st.session_state.save_error = False - -if st.session_state.delete_success: - st.success(st.session_state.get('delete_success_message', 'Eliminazione completata con successo!')) - st.session_state.delete_success = False - -if st.session_state.add_success: - st.success(st.session_state.get('add_success_message', 'Domanda aggiunta con successo!')) - st.session_state.add_success = False - -if st.session_state.import_success: - st.success(st.session_state.get('import_success_message', 'Importazione completata con successo!')) - st.session_state.import_success = False - -if st.session_state.import_error: - st.error(st.session_state.get('import_error_message', 'Errore durante l\'importazione.')) - st.session_state.import_error = False - -# Aggiungi un'intestazione stilizzata -add_page_header( - "Gestione Domande", - icon="📋", - description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." -) - -# Scheda per diverse funzioni di gestione delle domande -tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) - -# Scheda Visualizza e Modifica Domande -with tabs[0]: - st.header("Visualizza e Modifica Domande") - - if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df = st.session_state.questions - # Assicurati che la colonna 'categoria' esista, altrimenti aggiungila con valori vuoti - if 'categoria' not in questions_df.columns: - questions_df['categoria'] = "" - else: - # Riempi i valori NaN o None nella colonna 'categoria' con una stringa vuota o 'N/A' - # per assicurare che il filtro funzioni correttamente e per la visualizzazione. - questions_df['categoria'] = questions_df['categoria'].fillna('N/A') - - # Ottieni le categorie uniche per il filtro, includendo un'opzione per mostrare tutto - # Converti esplicitamente in stringa per evitare problemi con tipi misti e aggiungi 'Tutte le categorie' - unique_categories = sorted(list(questions_df['categoria'].astype(str).unique())) - unique_categories.insert(0, "Tutte le categorie") - - # Crea il selettore per la categoria - selected_category = st.selectbox( - "Filtra per categoria:", - options=unique_categories, - index=0 # Imposta "Tutte le categorie" come predefinito - ) - - # Filtra il DataFrame in base alla categoria selezionata - if selected_category == "Tutte le categorie": - filtered_questions_df = questions_df - else: - filtered_questions_df = questions_df[questions_df['categoria'] == selected_category] - - if not filtered_questions_df.empty: - for idx, row in filtered_questions_df.iterrows(): - # Usa .get('categoria', 'N/A') per una gestione sicura se 'categoria' non fosse presente o fosse NaN dopo il filtro - # Anche se abbiamo gestito i NaN prima, è una buona pratica per la robustezza. - category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' - with st.expander( - f"Domanda: {row['domanda'][:100]}... (Categoria: {category_display})" - ): - col1, col2 = st.columns([3, 1]) - - with col1: - edited_question = st.text_area( - f"Modifica Domanda {idx + 1}", - value=row['domanda'], - key=f"q_edit_{row['id']}" - ) - - edited_answer = st.text_area( - f"Modifica Risposta Attesa {idx + 1}", - value=row['risposta_attesa'], - key=f"a_edit_{row['id']}" - ) - - edited_category_value = row.get('categoria', '') - edited_category = st.text_input( - f"Modifica Categoria {idx + 1}", - value=edited_category_value, - key=f"c_edit_{row['id']}" - ) - - with col2: - # Pulsante Aggiorna con callback - st.button( - "Salva Modifiche", - key=f"save_{row['id']}", - on_click=save_question_callback, - args=(row['id'], edited_question, edited_answer, edited_category) - ) - - # Pulsante Elimina con dialog di conferma - if st.button( - "Elimina Domanda", - key=f"delete_{row['id']}", - type="secondary" - ): - confirm_delete_question_dialog(row['id'], row['domanda']) - else: - st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") - - else: - st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") - -# Scheda Aggiungi Domande -with tabs[1]: - st.header("Aggiungi Nuova Domanda") - - with st.form("add_question_form"): - domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") - risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") - categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") - - submitted = st.form_submit_button("Aggiungi Domanda") - - if submitted: - if domanda and risposta_attesa: - # Passa la categoria, che può essere una stringa vuota se non inserita - question_id = add_question(domanda=domanda, risposta_attesa=risposta_attesa, - categoria=categoria) - st.session_state.add_success_message = f"Domanda aggiunta con successo con ID: {question_id}" - st.session_state.add_success = True - st.session_state.trigger_rerun = True - st.rerun() +def render(): + # === Inizializzazione dello stato === + st.session_state.setdefault("question_page_state", QuestionPageState()) + state: QuestionPageState = st.session_state.question_page_state + + # Carica le domande utilizzando la cache + st.session_state.questions = load_questions() + + # Gestisce la logica di rerun + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + # Mostra i messaggi di stato + if state.save_success: + st.success(state.save_success_message) + if state.save_error: + st.error(state.save_error_message) + if state.delete_success: + st.success(state.delete_success_message) + if state.add_success: + st.success(state.add_success_message) + if state.import_success: + st.success(state.import_success_message) + if state.import_error: + st.error(state.import_error_message) + + # Resetta lo stato dopo la visualizzazione dei messaggi + st.session_state.question_page_state = QuestionPageState() + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Domande", + icon="📋", + description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." + ) + + # Scheda per diverse funzioni di gestione delle domande + tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) + + # Scheda Visualizza e Modifica Domande + with tabs[0]: + st.header("Visualizza e Modifica Domande") + + if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df, unique_categories = filter_questions_by_category() + category_options = ["Tutte le categorie"] + unique_categories + + selected_category = st.selectbox( + "Filtra per categoria:", + options=category_options, + index=0 + ) + + if selected_category == "Tutte le categorie": + filtered_questions_df = questions_df + else: + filtered_questions_df, _ = filter_questions_by_category(selected_category) + + if not filtered_questions_df.empty: + for idx, row in filtered_questions_df.iterrows(): + category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' + with st.expander( + f"Domanda: {row['domanda'][:100]}... (Categoria: {category_display})" + ): + col1, col2 = st.columns([3, 1]) + + with col1: + edited_question = st.text_area( + f"Modifica Domanda {idx + 1}", + value=row['domanda'], + key=f"q_edit_{row['id']}" + ) + + edited_answer = st.text_area( + f"Modifica Risposta Attesa {idx + 1}", + value=row['risposta_attesa'], + key=f"a_edit_{row['id']}" + ) + + edited_category_value = row.get('categoria', '') + edited_category = st.text_input( + f"Modifica Categoria {idx + 1}", + value=edited_category_value, + key=f"c_edit_{row['id']}" + ) + + with col2: + st.button( + "Salva Modifiche", + key=f"save_{row['id']}", + on_click=create_save_question_callback( + row['id'], edited_question, edited_answer, edited_category + ), + ) + + if st.button( + "Elimina Domanda", + key=f"delete_{row['id']}", + type="secondary" + ): + confirm_delete_question_dialog(row['id'], row['domanda']) else: - st.error("Sono necessarie sia la domanda che la risposta attesa.") - -# Scheda Importa da File -with tabs[2]: - st.header("Importa Domande da File") - - st.write(""" - Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). - - ### Formato File: - - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. - (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). - - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. - (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). - - ### Esempio CSV: - ```csv - domanda,risposta_attesa,categoria - "Quanto fa 2+2?","4","Matematica Base" - "Qual è la capitale della Francia?","Parigi","Geografia" - "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" - ``` - - ### Esempio JSON: - ```json - [ - { - "domanda": "Quanto fa 2+2?", - "risposta_attesa": "4", - "categoria": "Matematica Base" - }, - { - "domanda": "Qual è la capitale della Francia?", - "risposta_attesa": "Parigi", - "categoria": "Geografia" - }, - { - "domanda": "Chi ha scritto 'Romeo e Giulietta'?", - "risposta_attesa": "William Shakespeare" - } - ] - ``` - """) - - uploaded_file = st.file_uploader("Scegli un file", type=["csv", "json"]) + st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") - if uploaded_file is not None: - # Salva il file in session_state per l'uso da parte della callback - st.session_state.uploaded_file_content = uploaded_file - - # Pulsante che utilizza la funzione di callback - st.button( - "Importa Domande", - key="import_questions_btn", - on_click=import_questions_callback - ) + else: + st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") + + # Scheda Aggiungi Domande + with tabs[1]: + st.header("Aggiungi Nuova Domanda") + + with st.form("add_question_form"): + domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") + risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") + categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") + + submitted = st.form_submit_button("Aggiungi Domanda") + + if submitted: + if domanda and risposta_attesa: + # Passa la categoria, che può essere una stringa vuota se non inserita + question_id = add_question( + domanda=domanda, + risposta_attesa=risposta_attesa, + categoria=categoria, + ) + state = QuestionPageState() + state.add_success = True + state.add_success_message = ( + f"Domanda aggiunta con successo con ID: {question_id}" + ) + state.trigger_rerun = True + st.session_state.question_page_state = state + st.session_state.questions = refresh_questions() + st.rerun() + else: + st.error("Sono necessarie sia la domanda che la risposta attesa.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Domande da File") + + st.write(""" + Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). + + ### Formato File: + - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + + ### Esempio CSV: + ```csv + domanda,risposta_attesa,categoria + "Quanto fa 2+2?","4","Matematica Base" + "Qual è la capitale della Francia?","Parigi","Geografia" + "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" + ``` + + ### Esempio JSON: + ```json + [ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } + ] + ``` + """) + + uploaded_file = st.file_uploader("Scegli un file", type=["csv", "json"]) + + if uploaded_file is not None: + # Salva il file in session_state per l'uso da parte della callback + st.session_state.uploaded_file_content = uploaded_file + + # Pulsante che utilizza la funzione di callback + st.button( + "Importa Domande", + key="import_questions_btn", + on_click=import_questions_callback + ) diff --git a/view/gestione_set.py b/view/gestione_set.py index 772b37f..a8dc264 100644 --- a/view/gestione_set.py +++ b/view/gestione_set.py @@ -1,379 +1,396 @@ import streamlit as st from controllers.question_set_controller import ( - load_sets, create_set, + load_sets, ) -from controllers.question_controller import load_questions -from view.style_utils import add_page_header, add_section_title -from view.component_utils import create_card, create_metrics_container -from view.session_state import ensure_keys +from services.question_service import load_questions +from view.style_utils import add_page_header, add_global_styles +from view.state_models import SetPageState from view.set_helpers import ( - save_set_callback, - delete_set_callback, confirm_delete_set_dialog, import_set_callback, get_question_text, get_question_category, mark_expander_open, create_save_set_callback, - create_delete_set_callback, ) -ensure_keys({ - "save_set_success": False, - "save_set_error": False, - "delete_set_success": False, - "create_set_success": False, - "import_set_success": False, - "import_set_error": False, - "trigger_rerun": False, - "question_checkboxes": {}, - "newly_selected_questions": {}, - "set_expanders": {}, -}) - -if st.session_state.trigger_rerun: - st.session_state.trigger_rerun = False - st.rerun() - -if st.session_state.save_set_success: - st.success(st.session_state.get('save_set_success_message', 'Set aggiornato con successo!')) - st.session_state.save_set_success = False - -if st.session_state.save_set_error: - st.error(st.session_state.get('save_set_error_message', 'Errore durante l\'aggiornamento del set.')) - st.session_state.save_set_error = False - -if st.session_state.delete_set_success: - st.success(st.session_state.get('delete_set_success_message', 'Set eliminato con successo!')) - st.session_state.delete_set_success = False - -if st.session_state.create_set_success: - st.success(st.session_state.get('create_set_success_message', 'Set creato con successo!')) - st.session_state.create_set_success = False - -if st.session_state.import_set_success: - st.success(st.session_state.get('import_set_success_message', 'Importazione completata con successo!')) - st.session_state.import_set_success = False - -if st.session_state.import_set_error: - st.error(st.session_state.get('import_set_error_message', 'Errore durante l\'importazione.')) - st.session_state.import_set_error = False - -# Inizializza sempre i dati caricandoli dal database -st.session_state.questions = load_questions() -st.session_state.question_sets = load_sets() - -# Assicurati che esista lo stato degli expander per ogni set -if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: - current_set_ids = st.session_state.question_sets['id'].tolist() - # Rimuovi stati per set non più presenti - for sid in list(st.session_state.set_expanders.keys()): - if sid not in current_set_ids: - del st.session_state.set_expanders[sid] - # Aggiungi stato predefinito per nuovi set - for sid in current_set_ids: - st.session_state.set_expanders.setdefault(sid, False) - -# Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN -if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df_temp = st.session_state.questions - if 'categoria' not in questions_df_temp.columns: - questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante - questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN - st.session_state.questions = questions_df_temp - -# Aggiungi un'intestazione stilizzata -add_page_header( - "Gestione Set di Domande", - icon="📚", - description="Organizza le tue domande in set per test e valutazioni" -) +def render(): + add_global_styles() + + st.session_state.setdefault("set_page_state", SetPageState()) + state: SetPageState = st.session_state.set_page_state + + st.session_state.setdefault("question_checkboxes", {}) + st.session_state.setdefault("newly_selected_questions", {}) + st.session_state.setdefault("set_expanders", {}) + + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + if state.save_set_success: + st.success(state.save_set_success_message) + state.save_set_success = False + + if state.save_set_error: + st.error(state.save_set_error_message) + state.save_set_error = False + + if state.delete_set_success: + st.success(state.delete_set_success_message) + state.delete_set_success = False + + if state.create_set_success: + st.success(state.create_set_success_message) + state.create_set_success = False + + if state.import_set_success: + st.success(state.import_set_success_message) + state.import_set_success = False + + if state.import_set_error: + st.error(state.import_set_error_message) + state.import_set_error = False + + # Inizializza i dati utilizzando la cache + if 'questions' not in st.session_state: + st.session_state.questions = load_questions() + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + # Assicurati che esista lo stato degli expander per ogni set + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + current_set_ids = st.session_state.question_sets['id'].tolist() + # Rimuovi stati per set non più presenti + for sid in list(st.session_state.set_expanders.keys()): + if sid not in current_set_ids: + del st.session_state.set_expanders[sid] + # Aggiungi stato predefinito per nuovi set + for sid in current_set_ids: + st.session_state.set_expanders.setdefault(sid, False) + + # Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN + if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df_temp = st.session_state.questions + if 'categoria' not in questions_df_temp.columns: + questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante + questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN + st.session_state.questions = questions_df_temp + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Set di Domande", + icon="📚", + description="Organizza le tue domande in set per test e valutazioni" + ) + + # Schede per diverse funzioni di gestione dei set + tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) + + # Scheda Visualizza e Modifica Set + with tabs[0]: + st.header("Visualizza e Modifica Set di Domande") + + questions_ready = ('questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns) + sets_ready = 'question_sets' in st.session_state + + if not questions_ready: + st.warning( + "Dati delle domande (incluse categorie) non completamente caricati. " + "Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'." + ) + # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti + unique_categories_for_filter = [] + selected_categories = [] + else: + questions_df = st.session_state.questions + # Ottieni categorie uniche per il filtro, escludendo 'N/A' + # se si preferisce non mostrarlo come opzione selezionabile + # o gestendolo specificamente. Per ora, includiamo tutto. + unique_categories_for_filter = sorted( + list(questions_df['categoria'].astype(str).unique()) + ) + if not unique_categories_for_filter: + st.info( + "Nessuna categoria definita nelle domande esistenti per poter filtrare." + ) + + selected_categories = st.multiselect( + "Filtra per categorie (mostra i set che contengono almeno una domanda da " + "OGNI categoria selezionata):", + options=unique_categories_for_filter, + default=[], + key="filter_categories", + ) -# Schede per diverse funzioni di gestione dei set -tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) - - -# Funzione per ottenere il testo della domanda tramite ID - -# Scheda Visualizza e Modifica Set -with tabs[0]: - st.header("Visualizza e Modifica Set di Domande") - - questions_ready = ('questions' in st.session_state and - not st.session_state.questions.empty and - 'domanda' in st.session_state.questions.columns and - 'categoria' in st.session_state.questions.columns) - sets_ready = 'question_sets' in st.session_state - - if not questions_ready: - st.warning( - "Dati delle domande (incluse categorie) non completamente caricati. Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'.") - # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti - unique_categories_for_filter = [] - selected_categories = [] - else: - questions_df = st.session_state.questions - # Ottieni categorie uniche per il filtro, escludendo 'N/A' se si preferisce non mostrarlo come opzione selezionabile - # o gestendolo specificamente. Per ora, includiamo tutto. - unique_categories_for_filter = sorted(list(questions_df['categoria'].astype(str).unique())) - if not unique_categories_for_filter: - st.info("Nessuna categoria definita nelle domande esistenti per poter filtrare.") - - selected_categories = st.multiselect( - "Filtra per categorie (mostra i set che contengono almeno una domanda da OGNI categoria selezionata):", - options=unique_categories_for_filter, - default=[], - key="filter_categories", - ) - - if sets_ready and not st.session_state.question_sets.empty: - question_sets_df = st.session_state.question_sets - display_sets_df = question_sets_df.copy() # Inizia con tutti i set - - if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti - filtered_set_indices = [] - for idx, set_row in question_sets_df.iterrows(): - question_ids_in_set = set_row.get('questions', []) - if not isinstance(question_ids_in_set, list): - question_ids_in_set = [] - - if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro - continue - - categories_present_in_set = set() - for q_id in question_ids_in_set: - category = get_question_category(str(q_id), questions_df) - categories_present_in_set.add(category) - - # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata - if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): - filtered_set_indices.append(idx) - - display_sets_df = question_sets_df.loc[filtered_set_indices] - - if display_sets_df.empty and selected_categories: - st.info( - f"Nessun set trovato che contenga domande da tutte le categorie selezionate: {', '.join(selected_categories)}.") - elif display_sets_df.empty and not selected_categories: + if sets_ready and not st.session_state.question_sets.empty: + question_sets_df = st.session_state.question_sets + display_sets_df = question_sets_df.copy() # Inizia con tutti i set + + if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti + filtered_set_indices = [] + for idx, set_row in question_sets_df.iterrows(): + question_ids_in_set = set_row.get('questions', []) + if not isinstance(question_ids_in_set, list): + question_ids_in_set = [] + + if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro + continue + + categories_present_in_set = set() + for q_id in question_ids_in_set: + category = get_question_category(str(q_id), questions_df) + categories_present_in_set.add(category) + + # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata + if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): + filtered_set_indices.append(idx) + + display_sets_df = question_sets_df.loc[filtered_set_indices] + + if display_sets_df.empty and selected_categories: + st.info( + "Nessun set trovato che contenga domande da tutte le categorie selezionate: " + f"{', '.join(selected_categories)}." + ) + elif display_sets_df.empty and not selected_categories: + st.info( + "Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'." + ) + + for idx, row in display_sets_df.iterrows(): + exp_key = f"set_expander_{row['id']}" + if exp_key not in st.session_state.set_expanders: + st.session_state.set_expanders[exp_key] = False + + with st.expander( + f"Set: {row['name']}", + expanded=st.session_state.set_expanders.get(exp_key, False), + ): + col1, col2 = st.columns([3, 1]) + + with col1: + _ = st.text_input( + "Nome Set", + value=row['name'], + key=f"set_name_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + + st.subheader("Domande in questo Set") + current_question_ids_in_set = row.get('questions', []) + if not isinstance(current_question_ids_in_set, list): + current_question_ids_in_set = [] + + if row['id'] not in st.session_state.question_checkboxes: + st.session_state.question_checkboxes[row['id']] = {} + + if current_question_ids_in_set: + for q_id in current_question_ids_in_set: + q_text = get_question_text(str(q_id)) + q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' + display_text = f"{q_text} (Categoria: {q_cat})" + + # 使用回调来更新checkbox状态 + checkbox_value = st.checkbox( + display_text, + value=True, + key=f"qcheck_{row['id']}_{q_id}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value + else: + st.info("Nessuna domanda in questo set.") + + st.subheader("Aggiungi Domande al Set") + + # 初始化新选择的问题状态 + if row['id'] not in st.session_state.newly_selected_questions: + st.session_state.newly_selected_questions[row['id']] = [] + + if questions_ready: + all_questions_df = st.session_state.questions + available_questions_df = all_questions_df[ + ~all_questions_df['id'].astype(str).isin( + [str(q_id) for q_id in current_question_ids_in_set] + ) + ] + + if not available_questions_df.empty: + question_dict_for_multiselect = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" + for q_id, q_text in zip( + available_questions_df['id'].astype(str), + available_questions_df['domanda'], + ) + } + newly_selected_questions_ids = st.multiselect( + "Seleziona domande da aggiungere", + options=list(question_dict_for_multiselect.keys()), + format_func=lambda x: question_dict_for_multiselect.get(x, x), + key=f"add_q_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids + else: + st.info("Nessuna altra domanda disponibile da aggiungere.") + else: + st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") + + with col2: + st.button( + "Salva Modifiche", + key=f"save_set_{row['id']}", + on_click=create_save_set_callback(row['id'], exp_key, state) + ) + + # Pulsante Elimina con dialog di conferma + if st.button( + "Elimina Set", + key=f"delete_set_{row['id']}", + type="secondary" + ): + mark_expander_open(exp_key) + confirm_delete_set_dialog(row['id'], row['name'], state) + + # Lo stato dell'expander viene aggiornato tramite i callback + + elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") - for idx, row in display_sets_df.iterrows(): - exp_key = f"set_expander_{row['id']}" - if exp_key not in st.session_state.set_expanders: - st.session_state.set_expanders[exp_key] = False - - with st.expander( - f"Set: {row['name']}", - expanded=st.session_state.set_expanders.get(exp_key, False), - ): - col1, col2 = st.columns([3, 1]) - - with col1: - edited_name = st.text_input( - f"Nome Set", - value=row['name'], - key=f"set_name_{row['id']}", - on_change=mark_expander_open, - args=(exp_key,) - ) + # Scheda Crea Nuovo Set + with tabs[1]: + st.header("Crea Nuovo Set di Domande") - st.subheader("Domande in questo Set") - current_question_ids_in_set = row.get('questions', []) - if not isinstance(current_question_ids_in_set, list): - current_question_ids_in_set = [] - - if row['id'] not in st.session_state.question_checkboxes: - st.session_state.question_checkboxes[row['id']] = {} - - if current_question_ids_in_set: - for q_id in current_question_ids_in_set: - q_text = get_question_text(str(q_id)) - q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' - display_text = f"{q_text} (Categoria: {q_cat})" - - # 使用回调来更新checkbox状态 - checkbox_value = st.checkbox( - display_text, - value=True, - key=f"qcheck_{row['id']}_{q_id}", - on_change=mark_expander_open, - args=(exp_key,) - ) - st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value - else: - st.info("Nessuna domanda in questo set.") - - st.subheader("Aggiungi Domande al Set") - - # 初始化新选择的问题状态 - if row['id'] not in st.session_state.newly_selected_questions: - st.session_state.newly_selected_questions[row['id']] = [] - - if questions_ready: - all_questions_df = st.session_state.questions - available_questions_df = all_questions_df[ - ~all_questions_df['id'].astype(str).isin( - [str(q_id) for q_id in current_question_ids_in_set]) - ] - - if not available_questions_df.empty: - question_dict_for_multiselect = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" for q_id, q_text in - zip(available_questions_df['id'].astype(str), available_questions_df['domanda']) - } - newly_selected_questions_ids = st.multiselect( - "Seleziona domande da aggiungere", - options=list(question_dict_for_multiselect.keys()), - format_func=lambda x: question_dict_for_multiselect.get(x, x), - key=f"add_q_{row['id']}", - on_change=mark_expander_open, - args=(exp_key,) - ) - st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids - else: - st.info("Nessuna altra domanda disponibile da aggiungere.") - else: - st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") - - with col2: - st.button( - "Salva Modifiche", - key=f"save_set_{row['id']}", - on_click=create_save_set_callback(row['id'], exp_key) - ) + with st.form("create_set_form"): + set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") - # Pulsante Elimina con dialog di conferma - if st.button( - "Elimina Set", - key=f"delete_set_{row['id']}", - type="secondary" - ): - mark_expander_open(exp_key) - confirm_delete_set_dialog(row['id'], row['name']) - - # Lo stato dell'expander viene aggiornato tramite i callback - - elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): - st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") - -# Scheda Crea Nuovo Set -with tabs[1]: - st.header("Crea Nuovo Set di Domande") - - with st.form("create_set_form"): - set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") - - selected_qs_for_new_set = [] - questions_ready_for_creation = ('questions' in st.session_state and - not st.session_state.questions.empty and - 'domanda' in st.session_state.questions.columns and - 'categoria' in st.session_state.questions.columns) - - if questions_ready_for_creation: - all_questions_df_creation = st.session_state.questions - question_dict_for_creation = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" for q_id, q_text in - zip(all_questions_df_creation['id'].astype(str), all_questions_df_creation['domanda']) - } - - selected_qs_for_new_set = st.multiselect( - "Seleziona domande per questo set", - options=list(question_dict_for_creation.keys()), - format_func=lambda x: question_dict_for_creation.get(x, x), - key="create_set_questions", + selected_qs_for_new_set = [] + questions_ready_for_creation = ( + 'questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns ) - else: - st.info( - "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). Vai a 'Gestione Domande' per aggiungere/caricare domande.") - - submitted = st.form_submit_button("Crea Set") - - if submitted: - if set_name: - set_id = create_set(set_name, [str(q_id) for q_id in selected_qs_for_new_set]) - st.session_state.create_set_success_message = f"Set di domande creato con successo con ID: {set_id}" - st.session_state.create_set_success = True - st.session_state.trigger_rerun = True - st.rerun() - else: - st.error("Il nome del set è obbligatorio.") - -# Scheda Importa da File -with tabs[2]: - st.header("Importa Set da File") - - st.write(""" - Carica un file JSON o CSV contenente uno o più set di domande. - - ### Formato File JSON per Set Multipli: - ```json - [ - { - "name": "Capitali", - "questions": [ - { - "id": "1", - "domanda": "Qual è la capitale della Francia?", - "risposta_attesa": "Parigi", - "categoria": "Geografia" - }, - { - "id": "2", - "domanda": "Qual è la capitale della Germania?", - "risposta_attesa": "Berlino", - "categoria": "Geografia" - } - ] - }, - { - "name": "Matematica Base", - "questions": [ - { - "id": "3", - "domanda": "Quanto fa 2+2?", - "risposta_attesa": "4", - "categoria": "Matematica" - }, - { - "id": "4", - "domanda": "Quanto fa 10*4?", - "risposta_attesa": "40", - "categoria": "Matematica" + + if questions_ready_for_creation: + all_questions_df_creation = st.session_state.questions + question_dict_for_creation = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" + for q_id, q_text in zip( + all_questions_df_creation['id'].astype(str), + all_questions_df_creation['domanda'], + ) } - ] - } - ] - ``` - - ### Formato CSV: - Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` - (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e - ``categoria``. - ```csv - name,id,domanda,risposta_attesa,categoria - Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia - Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia - Matematica Base,3,Quanto fa 2+2?,4,Matematica - Matematica Base,4,Quanto fa 10*4?,40,Matematica - ``` - - ### Note Importanti: - - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente - - Se un set con lo stesso nome esiste già, verrà saltato - - Solo le domande nuove verranno aggiunte al database - - Le domande esistenti verranno referenziate nei nuovi set - """) - - uploaded_file = st.file_uploader("Scegli un file", type=["json", "csv"]) - - if uploaded_file is not None: - st.session_state.uploaded_file_content_set = uploaded_file - st.button( - "Importa Set", - key="import_set_btn", - on_click=import_set_callback - ) + selected_qs_for_new_set = st.multiselect( + "Seleziona domande per questo set", + options=list(question_dict_for_creation.keys()), + format_func=lambda x: question_dict_for_creation.get(x, x), + key="create_set_questions", + ) + else: + st.info( + "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). ", + "Vai a 'Gestione Domande' per aggiungere/caricare domande." + ) + + submitted = st.form_submit_button("Crea Set") + if submitted: + if set_name: + set_id = create_set( + set_name, [str(q_id) for q_id in selected_qs_for_new_set] + ) + state.create_set_success_message = ( + f"Set di domande creato con successo con ID: {set_id}" + ) + state.create_set_success = True + state.trigger_rerun = True + st.rerun() + else: + st.error("Il nome del set è obbligatorio.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Set da File") + + st.write(""" + Carica un file JSON o CSV contenente uno o più set di domande. + + ### Formato File JSON per Set Multipli: + ```json + [ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } + ] + ``` + + ### Formato CSV: + Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` + (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e + ``categoria``. + ```csv + name,id,domanda,risposta_attesa,categoria + Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia + Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia + Matematica Base,3,Quanto fa 2+2?,4,Matematica + Matematica Base,4,Quanto fa 10*4?,40,Matematica + ``` + + ### Note Importanti: + - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente + - Se un set con lo stesso nome esiste già, verrà saltato + - Solo le domande nuove verranno aggiunte al database + - Le domande esistenti verranno referenziate nei nuovi set + """) + + uploaded_file = st.file_uploader("Scegli un file", type=["json", "csv"]) + + if uploaded_file is not None: + st.session_state.uploaded_file_content_set = uploaded_file + st.button( + "Importa Set", + key="import_set_btn", + on_click=lambda: import_set_callback(state) + ) diff --git a/view/home.py b/view/home.py new file mode 100644 index 0000000..5538c4d --- /dev/null +++ b/view/home.py @@ -0,0 +1,96 @@ +"""Home page view module for the Streamlit application.""" + +import streamlit as st +from .style_utils import add_home_styles + + +def render(): + """Visualizza la pagina principale con le funzionalità della piattaforma.""" + + add_home_styles() + + st.markdown( + """ +
+

🧠 Piattaforma di Valutazione LLM

+

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

+
+""", + unsafe_allow_html=True, + ) + + # Box delle funzionalità con icone e stile migliorato + col1, col2 = st.columns(2) + + with col1: + st.markdown( + """ +
+

+ 📋 + Gestione delle Domande +

+

+ Crea, modifica e organizza le tue domande di test con le risposte previste. + Costruisci set di test completi per valutare le risposte LLM in modo efficiente. +

+
+ +
+

+ 🔌 + Supporto Multi-Provider API +

+

+ Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. + Configura parametri API e verifica le connessioni con feedback in tempo reale. +

+
+ """, + unsafe_allow_html=True, + ) + + with col2: + st.markdown( + """ +
+

+ 🧪 + Valutazione Automatizzata +

+

+ Esegui test con punteggio automatico rispetto alle risposte previste. + Valuta la somiglianza semantica tra testi con modelli linguistici. +

+
+ +
+

+ 📊 + Analisi Avanzata +

+

+ Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. + Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. +

+
+ """, + unsafe_allow_html=True, + ) + + st.markdown( + """ +
+

🚀 Iniziare

+
    +
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. +
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. +
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. +
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. +
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. +
+

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

+
+""", + unsafe_allow_html=True, + ) diff --git a/view/session_state.py b/view/session_state.py index 0301621..6e99fe8 100644 --- a/view/session_state.py +++ b/view/session_state.py @@ -1,12 +1,30 @@ import streamlit as st +from controllers.startup_controller import get_initial_state + def ensure_keys(defaults: dict) -> None: """Garantisce la presenza delle chiavi in ``st.session_state``. - Args: + Parametri: defaults: Dizionario con chiavi e valori da impostare se mancanti. """ for key, value in defaults.items(): st.session_state.setdefault(key, value) + +def initialize_session_state() -> None: + """Inizializza ``st.session_state`` con i valori di default.""" + required_keys = [ + "questions", + "question_sets", + "results", + "api_key", + "endpoint", + "model", + "temperature", + "max_tokens", + ] + if any(key not in st.session_state for key in required_keys): + defaults = get_initial_state() + ensure_keys(defaults) diff --git a/view/set_helpers.py b/view/set_helpers.py index 3d6686e..4cbf8ce 100644 --- a/view/set_helpers.py +++ b/view/set_helpers.py @@ -1,31 +1,44 @@ import streamlit as st -from controllers.question_set_controller import update_set, delete_set, import_sets_from_file -from controllers.question_controller import load_questions - - -def save_set_callback(set_id: str, edited_name: str, question_options_checkboxes: dict, newly_selected_questions_ids: list[str]): +from controllers.question_set_controller import ( + update_set, + delete_set, + import_sets_from_file, + refresh_question_sets, +) +from services.question_service import refresh_questions +from .state_models import SetPageState + + +def save_set_callback( + set_id: str, + edited_name: str, + question_options_checkboxes: dict, + newly_selected_questions_ids: list[str], + state: SetPageState, +) -> None: kept_questions_ids = [q_id for q_id, keep in question_options_checkboxes.items() if keep] - updated_questions_ids = list(set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids])) + updated_questions_ids = list( + set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids]) + ) - if update_set(set_id, edited_name, updated_questions_ids): - st.session_state.save_set_success_message = "Set di domande aggiornato con successo!" - st.session_state.save_set_success = True - st.session_state.trigger_rerun = True - else: - st.session_state.save_set_error_message = "Impossibile aggiornare il set di domande." - st.session_state.save_set_error = True + update_set(set_id, edited_name, updated_questions_ids) + state.save_set_success_message = "Set di domande aggiornato con successo!" + state.save_set_success = True + st.session_state.question_sets = refresh_question_sets() + state.trigger_rerun = True -def delete_set_callback(set_id: str): +def delete_set_callback(set_id: str, state: SetPageState): delete_set(set_id) - st.session_state.delete_set_success_message = "Set di domande eliminato con successo!" - st.session_state.delete_set_success = True - st.session_state.trigger_rerun = True + state.delete_set_success_message = "Set di domande eliminato con successo!" + state.delete_set_success = True + st.session_state.question_sets = refresh_question_sets() + state.trigger_rerun = True @st.dialog("Conferma Eliminazione") -def confirm_delete_set_dialog(set_id: str, set_name: str): +def confirm_delete_set_dialog(set_id: str, set_name: str, state: SetPageState): """Dialog di conferma per l'eliminazione del set di domande""" st.write(f"Sei sicuro di voler eliminare il set '{set_name}'?") st.warning("Questa azione non può essere annullata.") @@ -34,7 +47,7 @@ def confirm_delete_set_dialog(set_id: str, set_name: str): with col1: if st.button("Sì, Elimina", type="primary", use_container_width=True): - delete_set_callback(set_id) + delete_set_callback(set_id, state) st.rerun() with col2: @@ -42,40 +55,38 @@ def confirm_delete_set_dialog(set_id: str, set_name: str): st.rerun() -def import_set_callback(): +def import_set_callback(state: SetPageState): """Importa uno o più set di domande da file JSON o CSV.""" - st.session_state.import_set_success = False - st.session_state.import_set_error = False - st.session_state.import_set_success_message = "" - st.session_state.import_set_error_message = "" + state.import_set_success = False + state.import_set_error = False + state.import_set_success_message = "" + state.import_set_error_message = "" uploaded_file = st.session_state.get("uploaded_file_content_set") result = import_sets_from_file(uploaded_file) if result["success"]: - st.session_state.import_set_success = True - st.session_state.import_set_success_message = result["success_message"] - if result.get("questions_df") is not None: - st.session_state.questions = result["questions_df"] - if result.get("sets_df") is not None: - st.session_state.question_sets = result["sets_df"] + state.import_set_success = True + state.import_set_success_message = result["success_message"] + st.session_state.questions = refresh_questions() + st.session_state.question_sets = refresh_question_sets() st.session_state.uploaded_file_content_set = None else: - st.session_state.import_set_error = True - st.session_state.import_set_error_message = result["error_message"] + state.import_set_error = True + state.import_set_error_message = result["error_message"] for warn in result.get("warnings", []): st.warning(warn) - st.session_state.trigger_rerun = True + state.trigger_rerun = True def get_question_text(question_id: str) -> str: """Ritorna il testo della domanda dato il suo ID.""" if "questions" in st.session_state and not st.session_state.questions.empty: if "domanda" not in st.session_state.questions.columns: - st.session_state.questions = load_questions() + st.session_state.questions = refresh_questions() if "domanda" not in st.session_state.questions.columns: return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" @@ -100,20 +111,26 @@ def mark_expander_open(exp_key: str): st.session_state.set_expanders[exp_key] = True -def create_save_set_callback(set_id: str, exp_key: str): +def create_save_set_callback(set_id: str, exp_key: str, state: SetPageState): def callback(): mark_expander_open(exp_key) edited_name = st.session_state.get(f"set_name_{set_id}", "") question_options_checkboxes = st.session_state.question_checkboxes.get(set_id, {}) newly_selected_questions_ids = st.session_state.newly_selected_questions.get(set_id, []) - save_set_callback(set_id, edited_name, question_options_checkboxes, newly_selected_questions_ids) + save_set_callback( + set_id, + edited_name, + question_options_checkboxes, + newly_selected_questions_ids, + state, + ) return callback -def create_delete_set_callback(set_id: str): +def create_delete_set_callback(set_id: str, state: SetPageState): def callback(): - delete_set_callback(set_id) + delete_set_callback(set_id, state) return callback diff --git a/view/state_models.py b/view/state_models.py new file mode 100644 index 0000000..4bceddf --- /dev/null +++ b/view/state_models.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass + + +@dataclass +class SetPageState: + """Transient UI state for the question set management page.""" + + save_set_success: bool = False + save_set_success_message: str = "Set aggiornato con successo!" + save_set_error: bool = False + save_set_error_message: str = "Errore durante l'aggiornamento del set." + + delete_set_success: bool = False + delete_set_success_message: str = "Set eliminato con successo!" + + create_set_success: bool = False + create_set_success_message: str = "Set creato con successo!" + + import_set_success: bool = False + import_set_success_message: str = "Importazione completata con successo!" + import_set_error: bool = False + import_set_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False + + +@dataclass +class QuestionPageState: + """Transient UI state for the question management page.""" + + save_success: bool = False + save_success_message: str = "Domanda aggiornata con successo!" + save_error: bool = False + save_error_message: str = "Impossibile aggiornare la domanda." + + delete_success: bool = False + delete_success_message: str = "Domanda eliminata con successo!" + + add_success: bool = False + add_success_message: str = "Domanda aggiunta con successo!" + + import_success: bool = False + import_success_message: str = "Importazione completata con successo!" + import_error: bool = False + import_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False diff --git a/view/style_utils.py b/view/style_utils.py index 032f2bd..0a78ba5 100644 --- a/view/style_utils.py +++ b/view/style_utils.py @@ -1,3 +1,8 @@ +"""Funzioni di utilità per applicare stili CSS nelle viste Streamlit. + +Centralizza l'iniezione di CSS per favorirne il riuso tra le pagine. +""" + import streamlit as st @@ -47,12 +52,28 @@ def add_global_styles(): border-radius: 8px !important; width: 100% !important; } + .stMultiselect span[data-baseweb="tag"] { + max-width: 100%; + white-space: normal !important; + flex-wrap: wrap; + overflow-wrap: anywhere; /* optional safeguard for long tokens */ + } + .stMultiselect span[data-baseweb="tag"] span { + white-space: normal !important; + word-break: break-word; + } + .stMultiselect .st-gz { + max-width: none !important; + } /* Consenti testo a capo all'interno di tutti i menu select */ div[data-baseweb="select"] * { - white-space: normal !important; + max-width: 100%; + overflow-wrap: anywhere; + word-break: normal; } div[data-baseweb="menu"] * { white-space: normal !important; + word-break: break-word; } /* Stile dei pulsanti */ @@ -79,7 +100,8 @@ def add_global_styles(): color: #333333 !important; } - .stCheckbox > div[role="radiogroup"] > label > div:first-child, .stRadio > div[role="radiogroup"] > label > div:first-child { + .stCheckbox > div[role="radiogroup"] > label > div:first-child, + .stRadio > div[role="radiogroup"] > label > div:first-child { background-color: white !important; border-color: #C0C9F1 !important; } @@ -185,3 +207,166 @@ def add_section_title(title: str, icon: str | None = None): unsafe_allow_html=True, ) + +def add_home_styles(): + """Applica gli stili CSS specifici della home page. + + Migliora la visibilità degli input nei temi chiaro e scuro e definisce + l'aspetto degli elementi principali come box funzionali e sezioni di + benvenuto. + """ + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) diff --git a/view/visualizza_risultati.py b/view/visualizza_risultati.py index d1c18ff..96424f1 100644 --- a/view/visualizza_risultati.py +++ b/view/visualizza_risultati.py @@ -5,483 +5,461 @@ import plotly.graph_objects as go from controllers.test_controller import ( - load_results, import_results_from_file, + load_results, + refresh_results, + calculate_statistics, ) from controllers.question_set_controller import load_sets from controllers.api_preset_controller import load_presets from view.style_utils import add_page_header, add_section_title -from view.component_utils import create_card, create_metrics_container - -add_page_header( - "Visualizzazione Risultati Test", - icon="📊", - description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." -) - -# Carica sempre i risultati direttamente dal database -st.session_state.results = load_results() -if st.session_state.results.empty: - st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") - st.stop() - -# Carica sempre i set di domande dal database -st.session_state.question_sets = load_sets() - -# Carica sempre i preset API dal database -st.session_state.api_presets = load_presets() - -# Stato per messaggi di importazione risultati -if 'import_results_success' not in st.session_state: - st.session_state.import_results_success = False -if 'import_results_error' not in st.session_state: - st.session_state.import_results_error = False -if 'import_results_message' not in st.session_state: - st.session_state.import_results_message = "" - -if st.session_state.import_results_success: - st.success(st.session_state.import_results_message) - st.session_state.import_results_success = False -if st.session_state.import_results_error: - st.error(st.session_state.import_results_message) - st.session_state.import_results_error = False - -def get_set_name(set_id): - if not st.session_state.question_sets.empty: - set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] - if not set_info.empty: - return set_info.iloc[0]['name'] - return "Set Sconosciuto" - -def get_model_from_preset_name(preset_name): - """Restituisce il modello associato a un preset, se disponibile.""" - if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: - preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] - if not preset_row.empty: - return preset_row.iloc[0]['model'] - return "Sconosciuto" - -def import_results_callback(): - """Callback per importare risultati da file JSON.""" - if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: - success, message = import_results_from_file(st.session_state.uploaded_results_file) - st.session_state.import_results_message = message - st.session_state.import_results_success = success - st.session_state.import_results_error = not success - if success: - st.session_state.results = load_results() - st.session_state.uploaded_results_file = None - -# Filtri per Set e Modello LLM -all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) -all_model_names = sorted({get_model_from_preset_name(r['results'].get('generation_preset')) for _, r in st.session_state.results.iterrows()}) - -selected_set_filter = st.selectbox( - "Filtra per Set", - options=["Tutti"] + all_set_names, - index=0, - key="filter_set_name" -) - -selected_model_filter = st.selectbox( - "Filtra per Modello LLM", - options=["Tutti"] + all_model_names, - index=0, - key="filter_model_name" -) -filtered_results_df = st.session_state.results -if selected_set_filter != "Tutti": - set_ids = st.session_state.question_sets[st.session_state.question_sets['name'] == selected_set_filter]['id'].astype(str) - filtered_results_df = filtered_results_df[filtered_results_df['set_id'].astype(str).isin(set_ids)] -if selected_model_filter != "Tutti": - filtered_results_df = filtered_results_df[ - filtered_results_df.apply( - lambda row: get_model_from_preset_name(row['results'].get('generation_preset')) == selected_model_filter, - axis=1 - ) - ] - -# Elabora i risultati per la visualizzazione nel selectbox -processed_results_for_select = [] -for _, row in filtered_results_df.iterrows(): - result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli - set_name = get_set_name(row['set_id']) - avg_score = result_data.get('avg_score', 0) - method = result_data.get('method', 'N/A') - method_icon = "🤖" if method == "LLM" else "📊" - - processed_results_for_select.append({ - 'id': row['id'], - 'display_name': f"{row['timestamp']} - {method_icon} {set_name} (Avg: {avg_score:.2f}%) - {method}" +def render(): + add_page_header( + "Visualizzazione Risultati Test", + icon="📊", + description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." + ) + + # Carica i risultati utilizzando la cache + if 'results' not in st.session_state: + st.session_state.results = load_results() + if st.session_state.results.empty: + st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") + st.stop() + + # Carica i set di domande utilizzando la cache + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + # Carica i preset API utilizzando la cache + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + + # Stato per messaggi di importazione risultati + if 'import_results_success' not in st.session_state: + st.session_state.import_results_success = False + if 'import_results_error' not in st.session_state: + st.session_state.import_results_error = False + if 'import_results_message' not in st.session_state: + st.session_state.import_results_message = "" + + if st.session_state.import_results_success: + st.success(st.session_state.import_results_message) + st.session_state.import_results_success = False + if st.session_state.import_results_error: + st.error(st.session_state.import_results_message) + st.session_state.import_results_error = False + + def get_set_name(set_id): + if not st.session_state.question_sets.empty: + set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] + if not set_info.empty: + return set_info.iloc[0]['name'] + return "Set Sconosciuto" + + def get_model_from_preset_name(preset_name): + """Restituisce il modello associato a un preset, se disponibile.""" + if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: + preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] + if not preset_row.empty: + return preset_row.iloc[0]['model'] + return "Sconosciuto" + + def import_results_callback(): + """Callback per importare risultati da file JSON.""" + if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: + success, message = import_results_from_file(st.session_state.uploaded_results_file) + st.session_state.import_results_message = message + st.session_state.import_results_success = success + st.session_state.import_results_error = not success + if success: + st.session_state.results = refresh_results() + st.session_state.uploaded_results_file = None + + # Filtri per Set e Modello LLM + all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) + + all_model_names = sorted({ + r['results']['generation_llm'] + for _, r in st.session_state.results.iterrows() + if r['results'].get('generation_llm') }) -processed_results_for_select.sort(key=lambda x: x['display_name'].split(' - ')[0], reverse=True) # Ordina per timestamp - -result_options = {r['id']: r['display_name'] for r in processed_results_for_select} + selected_set_filter = st.selectbox( + "Filtra per Set", + options=["Tutti"] + all_set_names, + index=0, + key="filter_set_name" + ) + + selected_model_filter = st.selectbox( + "Filtra per Modello LLM", + options=["Tutti"] + all_model_names, + index=0, + key="filter_model_name" + ) + + filtered_results_df = st.session_state.results + if selected_set_filter != "Tutti": + set_ids = st.session_state.question_sets[ + st.session_state.question_sets['name'] == selected_set_filter + ]['id'].astype(str) + filtered_results_df = filtered_results_df[ + filtered_results_df['set_id'].astype(str).isin(set_ids) + ] + + if selected_model_filter != "Tutti": + filtered_results_df = filtered_results_df[ + filtered_results_df['results'].apply( + lambda res: res.get('generation_llm') == selected_model_filter + ) + ] + + # Elabora i risultati per la visualizzazione nel selectbox + processed_results_for_select = [] + for _, row in filtered_results_df.iterrows(): + result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli + set_name = get_set_name(row['set_id']) + avg_score = result_data.get('avg_score', 0) + method = result_data.get('method', 'N/A') + method_icon = "🤖" if method == "LLM" else "📊" + + processed_results_for_select.append( + { + 'id': row['id'], + 'display_name': ( + f"{row['timestamp']} - {method_icon} {set_name} " + f"(Avg: {avg_score:.2f}%) - {method}" + ), + } + ) -# Seleziona il risultato da visualizzare -selected_result_id = st.selectbox( - "Seleziona un Risultato del Test da Visualizzare", - options=list(result_options.keys()), - format_func=lambda x: result_options[x], - index=0 if result_options else None, - key="select_test_result_to_view" -) + processed_results_for_select.sort( + key=lambda x: x['display_name'].split(' - ')[0], + reverse=True, + ) # Ordina per timestamp + + result_options = {r['id']: r['display_name'] for r in processed_results_for_select} + + # Seleziona il risultato da visualizzare + selected_result_id = st.selectbox( + "Seleziona un Risultato del Test da Visualizzare", + options=list(result_options.keys()), + format_func=lambda x: result_options[x], + index=0 if result_options else None, + key="select_test_result_to_view" + ) + + # Opzionalmente seleziona un secondo risultato per il confronto + # Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso + compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] + compare_result_id = st.selectbox( + "Confronta con un altro risultato (opzionale)", + options=[None] + compare_options, + format_func=lambda x: "Nessun confronto" if x is None else result_options[x], + index=0, + key="select_test_result_compare" + ) + if not selected_result_id: + st.info("Nessun risultato selezionato o disponibile.") + st.stop() + + # Ottieni i dati del risultato selezionato + selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] + result_data = selected_result_row['results'] + set_name = get_set_name(selected_result_row['set_id']) + questions_results = result_data.get('questions', {}) + + with st.expander("Esporta/Importa Risultati"): + col_exp, col_imp = st.columns(2) + with col_exp: + selected_json = json.dumps({ + 'id': selected_result_row['id'], + 'set_id': selected_result_row['set_id'], + 'timestamp': selected_result_row['timestamp'], + 'results': result_data + }, indent=2) + st.download_button( + "Export Risultato Selezionato", + selected_json, + file_name=f"result_{selected_result_row['id']}.json", + mime="application/json" + ) -# Opzionalmente seleziona un secondo risultato per il confronto -# Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso -compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] -compare_result_id = st.selectbox( - "Confronta con un altro risultato (opzionale)", - options=[None] + compare_options, - format_func=lambda x: "Nessun confronto" if x is None else result_options[x], - index=0, - key="select_test_result_compare" -) -if not selected_result_id: - st.info("Nessun risultato selezionato o disponibile.") - st.stop() - -# Ottieni i dati del risultato selezionato -selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] -result_data = selected_result_row['results'] -set_name = get_set_name(selected_result_row['set_id']) -questions_results = result_data.get('questions', {}) - -with st.expander("Esporta/Importa Risultati"): - col_exp, col_imp = st.columns(2) - with col_exp: - selected_json = json.dumps({ - 'id': selected_result_row['id'], - 'set_id': selected_result_row['set_id'], - 'timestamp': selected_result_row['timestamp'], - 'results': result_data - }, indent=2) - st.download_button( - "Export Risultato Selezionato", - selected_json, - file_name=f"result_{selected_result_row['id']}.json", - mime="application/json" - ) + all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) + st.download_button( + "Export Tutti i Risultati", + all_json, + file_name="all_results.json", + mime="application/json" + ) - all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) - st.download_button( - "Export Tutti i Risultati", - all_json, - file_name="all_results.json", - mime="application/json" - ) + with col_imp: + uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") + if uploaded_file is not None: + st.session_state.uploaded_results_file = uploaded_file + st.button( + "Importa Risultati", + on_click=import_results_callback, + key="import_results_btn" + ) - with col_imp: - uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") - if uploaded_file is not None: - st.session_state.uploaded_results_file = uploaded_file - st.button( - "Importa Risultati", - on_click=import_results_callback, - key="import_results_btn" - ) + # Carica eventuale risultato di confronto + compare_result_row = None + compare_result_data = None + compare_questions_results = {} + if compare_result_id: + compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] + compare_result_data = compare_result_row['results'] + compare_questions_results = compare_result_data.get('questions', {}) + + # Visualizza informazioni generali sul risultato + evaluation_method = result_data.get('method', 'LLM') + method_icon = "🤖" if evaluation_method == "LLM" else "📊" + method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" + + add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") + st.markdown(f"**ID Risultato:** `{selected_result_id}`") + st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") + st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") + + if 'generation_llm' in result_data: + st.markdown(f"**LLM Generazione Risposte:** `{result_data['generation_llm']}`") + elif 'generation_preset' in result_data: + st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") + if evaluation_method == "LLM": + if 'evaluation_llm' in result_data: + st.markdown(f"**LLM Valutazione Risposte:** `{result_data['evaluation_llm']}`") + elif 'evaluation_preset' in result_data: + st.markdown( + f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`" + ) -# Carica eventuale risultato di confronto -compare_result_row = None -compare_result_data = None -compare_questions_results = {} -compare_set_name = "" -if compare_result_id: - compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] - compare_result_data = compare_result_row['results'] - compare_questions_results = compare_result_data.get('questions', {}) - compare_set_name = get_set_name(compare_result_row['set_id']) - -# Visualizza informazioni generali sul risultato -evaluation_method = result_data.get('method', 'LLM') -method_icon = "🤖" if evaluation_method == "LLM" else "📊" -method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" - -add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") -st.markdown(f"**ID Risultato:** `{selected_result_id}`") -st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") -st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") - -if 'generation_preset' in result_data: - st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") -if evaluation_method == "LLM" and 'evaluation_preset' in result_data: - st.markdown(f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`") - - -# Metriche Generali del Test -add_section_title("Metriche Generali del Test", icon="📈") - -if questions_results: - avg_score_overall = result_data.get('avg_score', 0) - num_questions = len(questions_results) - - cols_metrics = st.columns(2) - with cols_metrics[0]: - st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") - with cols_metrics[1]: - st.metric("Numero di Domande Valutate", num_questions) + # Metriche Generali del Test + add_section_title("Metriche Generali del Test", icon="📈") + + if questions_results: + stats = calculate_statistics(questions_results) + avg_score_overall = stats["avg_score"] + num_questions = len(stats["per_question_scores"]) + + cols_metrics = st.columns(2) + with cols_metrics[0]: + st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") + with cols_metrics[1]: + st.metric("Numero di Domande Valutate", num_questions) + + compare_stats = None + if compare_result_row is not None: + compare_stats = calculate_statistics(compare_questions_results) + compare_avg = compare_stats["avg_score"] + diff_avg = compare_avg - avg_score_overall + st.markdown("### Confronto") + cols_cmp = st.columns(3) + cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") + cols_cmp[1].metric("Punteggio Confronto", f"{compare_avg:.2f}%") + cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") + + scores_data = [] + for item in stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Selezionato"}) + if compare_stats: + for item in compare_stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Confronto"}) + + if scores_data: + df_scores = pd.DataFrame(scores_data) + unique_questions = len({d['Domanda'] for d in scores_data}) + fig = px.bar( + df_scores, + x='Domanda', + y='Punteggio', + color='Tipo', + barmode='group', + title="Punteggi per Domanda", + height=max(400, unique_questions * 30), + ) + fig.update_layout(yaxis_range=[0, 100]) + st.plotly_chart(fig, use_container_width=True) - if compare_result_row is not None: - compare_avg = compare_result_data.get('avg_score', 0) - diff_avg = compare_avg - avg_score_overall - st.markdown("### Confronto") - cols_cmp = st.columns(3) - cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") - cols_cmp[1].metric(f"Punteggio Confronto", f"{compare_avg:.2f}%") - cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") - - # Grafico a barre dei punteggi per domanda (mostra anche il risultato di confronto se presente) - scores_data = [] - all_q_ids = set(questions_results.keys()) - if compare_result_row is not None: - all_q_ids |= set(compare_questions_results.keys()) - - for i, q_id in enumerate(all_q_ids): - q1 = questions_results.get(q_id) - q2 = compare_questions_results.get(q_id) - label = "" - if q1: - label = q1.get('question', f'Domanda {i}') - elif q2: - label = q2.get('question', f'Domanda {i}') - label = label[:50] + "..." if len(label) > 50 else label - - if q1: - scores_data.append({'Domanda': label, 'Punteggio': q1.get('evaluation', {}).get('score', 0), 'Tipo': 'Selezionato'}) - if q2: - scores_data.append({'Domanda': label, 'Punteggio': q2.get('evaluation', {}).get('score', 0), 'Tipo': 'Confronto'}) - - if scores_data: - df_scores = pd.DataFrame(scores_data) - fig = px.bar(df_scores, x='Domanda', y='Punteggio', color='Tipo', barmode='group', - title="Punteggi per Domanda", height=max(400, len(all_q_ids) * 30)) - fig.update_layout(yaxis_range=[0, 100]) - st.plotly_chart(fig, use_container_width=True) - - # Grafico aggiuntivo solo per la modalità LLM if evaluation_method == "LLM": - # Raccogliamo i dati di Somiglianza, Correttezza e Completezza per ogni domanda - radar_data = [] - metrics_sum = {'similarity': 0, 'correctness': 0, 'completeness': 0} - count = 0 - - for q_id, q_data in questions_results.items(): - evaluation = q_data.get('evaluation', {}) - question_text = q_data.get('question', f'Domanda {q_id}') - # Utilizziamo i primi 20 caratteri della domanda come etichetta - question_label = question_text[:20] + "..." if len(question_text) > 20 else question_text - - # Raccogliamo i dati per il grafico radar individuale - similarity = evaluation.get('similarity', 0) - correctness = evaluation.get('correctness', 0) - completeness = evaluation.get('completeness', 0) - - radar_data.append({ - 'Domanda': question_label, - 'Somiglianza': similarity, - 'Correttezza': correctness, - 'Completezza': completeness - }) - - # Sommiamo per calcolare le medie - metrics_sum['similarity'] += similarity - metrics_sum['correctness'] += correctness - metrics_sum['completeness'] += completeness - count += 1 - - # Calcoliamo le medie per il risultato selezionato - avg_metrics = { - 'similarity': metrics_sum['similarity'] / count if count > 0 else 0, - 'correctness': metrics_sum['correctness'] / count if count > 0 else 0, - 'completeness': metrics_sum['completeness'] / count if count > 0 else 0 - } - - # Se esiste un risultato di confronto calcoliamo anche le sue medie - avg_metrics_cmp = None - if compare_result_row is not None and compare_questions_results: - cmp_sum = {'similarity': 0, 'correctness': 0, 'completeness': 0} - cmp_count = 0 - for q_cmp in compare_questions_results.values(): - eval_cmp = q_cmp.get('evaluation', {}) - cmp_sum['similarity'] += eval_cmp.get('similarity', 0) - cmp_sum['correctness'] += eval_cmp.get('correctness', 0) - cmp_sum['completeness'] += eval_cmp.get('completeness', 0) - cmp_count += 1 - avg_metrics_cmp = { - 'similarity': cmp_sum['similarity'] / cmp_count if cmp_count > 0 else 0, - 'correctness': cmp_sum['correctness'] / cmp_count if cmp_count > 0 else 0, - 'completeness': cmp_sum['completeness'] / cmp_count if cmp_count > 0 else 0 - } - - # Creiamo un DataFrame con i dati - df_radar = pd.DataFrame(radar_data) - - # Prima mostriamo il radar chart per ogni domanda categories = ['Somiglianza', 'Correttezza', 'Completezza'] - - # Creiamo il grafico radar fig_radar = go.Figure() - - # Aggiungiamo una traccia per ogni domanda del risultato selezionato - for i, row in df_radar.iterrows(): - fig_radar.add_trace(go.Scatterpolar( - r=[row['Somiglianza'], row['Correttezza'], row['Completezza']], + rm = stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[rm['similarity'], rm['correctness'], rm['completeness']], theta=categories, fill='toself', - name=row['Domanda'] - )) - - # Traccia media risultato selezionato - fig_radar.add_trace(go.Scatterpolar( - r=[avg_metrics['similarity'], avg_metrics['correctness'], avg_metrics['completeness']], - theta=categories, - fill='toself', - name='Media', - line=dict(color='red', width=3) - )) - - # Traccia media confronto, se disponibile - if avg_metrics_cmp is not None: - fig_radar.add_trace(go.Scatterpolar( - r=[avg_metrics_cmp['similarity'], avg_metrics_cmp['correctness'], avg_metrics_cmp['completeness']], - theta=categories, - fill='toself', - name='Media Confronto', - line=dict(color='green', width=3, dash='dash') - )) - - # Configuriamo il layout del grafico radar - fig_radar.update_layout( - title="Grafico Radar delle Metriche LLM per ogni domanda", - polar=dict( - radialaxis=dict( - visible=True, - range=[0, 100] + name='Selezionato', + ) + ) + if compare_stats: + crm = compare_stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[crm['similarity'], crm['correctness'], crm['completeness']], + theta=categories, + fill='toself', + name='Confronto', ) - ), + ) + fig_radar.update_layout( + title="Grafico Radar delle Metriche LLM", + polar=dict(radialaxis=dict(visible=True, range=[0, 100])), showlegend=True, legend=dict( orientation="h", yanchor="bottom", y=-0.2, xanchor="center", - x=0.5 + x=0.5, ), - height=600 + height=600, ) - - # Mostriamo il grafico radar st.plotly_chart(fig_radar, use_container_width=True) - # Mostriamo anche i valori medi in un blocco di metriche per maggiore chiarezza st.subheader("Valori medi delle metriche") cols = st.columns(3) - cols[0].metric("Somiglianza", f"{avg_metrics['similarity']:.2f}%") - cols[1].metric("Correttezza", f"{avg_metrics['correctness']:.2f}%") - cols[2].metric("Completezza", f"{avg_metrics['completeness']:.2f}%") + cols[0].metric("Somiglianza", f"{rm['similarity']:.2f}%") + cols[1].metric("Correttezza", f"{rm['correctness']:.2f}%") + cols[2].metric("Completezza", f"{rm['completeness']:.2f}%") - if avg_metrics_cmp is not None: + if compare_stats: cols_cmp = st.columns(3) - cols_cmp[0].metric("Somiglianza (Confronto)", f"{avg_metrics_cmp['similarity']:.2f}%") - cols_cmp[1].metric("Correttezza (Confronto)", f"{avg_metrics_cmp['correctness']:.2f}%") - cols_cmp[2].metric("Completezza (Confronto)", f"{avg_metrics_cmp['completeness']:.2f}%") -else: - st.info("Nessun dettaglio per le domande disponibile in questo risultato.") - -if compare_result_row is not None: - add_section_title("Confronto Dettagliato per Domanda", icon="🔍") - comparison_rows = [] - all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) - for qid in all_q_ids: - q1 = questions_results.get(qid, {}) - q2 = compare_questions_results.get(qid, {}) - label = q1.get('question') or q2.get('question') or str(qid) - score1 = q1.get('evaluation', {}).get('score', None) - score2 = q2.get('evaluation', {}).get('score', None) - delta = None - if score1 is not None and score2 is not None: - delta = score2 - score1 - comparison_rows.append({ - 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), - 'Selezionato': score1, - 'Confronto': score2, - 'Delta': delta - }) - if comparison_rows: - df_comp = pd.DataFrame(comparison_rows) - st.dataframe(df_comp) - -# Dettagli per ogni domanda -add_section_title("Risultati Dettagliati per Domanda", icon="📝") -if not questions_results: - st.info("Nessuna domanda trovata in questo set di risultati.") -else: - for q_id, q_data in questions_results.items(): - question_text = q_data.get('question', "Testo domanda non disponibile") - expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") - actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") - - with st.expander( - f"Domanda: {question_text[:100]}..." - ): - st.markdown(f"**Domanda:** {question_text}") - st.markdown(f"**Risposta Attesa:** {expected_answer}") - st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") - st.divider() - - # Mostra Dettagli API di Generazione (se presenti e richiesti) - generation_api_details = q_data.get('generation_api_details') - if generation_api_details and isinstance(generation_api_details, dict): - with st.container(): - st.markdown("###### Dettagli Chiamata API di Generazione Risposta") - if generation_api_details.get('request'): - st.caption("Richiesta API Generazione:") - st.json(generation_api_details['request'], expanded=False) - if generation_api_details.get('response_content'): - st.caption("Contenuto Risposta API Generazione:") - # Prova a formattare se è una stringa JSON, altrimenti mostra com'è - try: - response_data_gen = json.loads(generation_api_details['response_content']) if isinstance(generation_api_details['response_content'], str) else generation_api_details['response_content'] - st.code(json.dumps(response_data_gen, indent=2), language="json") - except: - st.text(generation_api_details['response_content']) - if generation_api_details.get('error'): - st.caption("Errore API Generazione:") - st.error(generation_api_details['error']) + cols_cmp[0].metric("Somiglianza (Confronto)", f"{crm['similarity']:.2f}%") + cols_cmp[1].metric("Correttezza (Confronto)", f"{crm['correctness']:.2f}%") + cols_cmp[2].metric("Completezza (Confronto)", f"{crm['completeness']:.2f}%") + else: + st.info("Nessun dettaglio per le domande disponibile in questo risultato.") + + if compare_result_row is not None: + add_section_title("Confronto Dettagliato per Domanda", icon="🔍") + comparison_rows = [] + all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) + for qid in all_q_ids: + q1 = questions_results.get(qid, {}) + q2 = compare_questions_results.get(qid, {}) + label = q1.get('question') or q2.get('question') or str(qid) + score1 = q1.get('evaluation', {}).get('score', None) + score2 = q2.get('evaluation', {}).get('score', None) + delta = None + if score1 is not None and score2 is not None: + delta = score2 - score1 + comparison_rows.append({ + 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), + 'Selezionato': score1, + 'Confronto': score2, + 'Delta': delta + }) + if comparison_rows: + df_comp = pd.DataFrame(comparison_rows) + st.dataframe(df_comp) + + # Dettagli per ogni domanda + add_section_title("Risultati Dettagliati per Domanda", icon="📝") + if not questions_results: + st.info("Nessuna domanda trovata in questo set di risultati.") + else: + for q_id, q_data in questions_results.items(): + question_text = q_data.get('question', "Testo domanda non disponibile") + expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") + actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") + + with st.expander( + f"Domanda: {question_text[:100]}..." + ): + st.markdown(f"**Domanda:** {question_text}") + st.markdown(f"**Risposta Attesa:** {expected_answer}") + st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") st.divider() - - if evaluation_method == "LLM": - evaluation = q_data.get('evaluation', {}) # Assicurati che evaluation sia sempre un dizionario - st.markdown(f"##### Valutazione LLM") - score = evaluation.get('score', 0) - explanation = evaluation.get('explanation', "Nessuna spiegazione.") - similarity = evaluation.get('similarity', 0) - correctness = evaluation.get('correctness', 0) - completeness = evaluation.get('completeness', 0) - - st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") - st.markdown(f"**Spiegazione:** {explanation}") - - cols_eval_metrics = st.columns(3) - cols_eval_metrics[0].metric("Somiglianza", f"{similarity:.2f}%") - cols_eval_metrics[1].metric("Correttezza", f"{correctness:.2f}%") - cols_eval_metrics[2].metric("Completezza", f"{completeness:.2f}%") - - api_details = evaluation.get('api_details') - if api_details and isinstance(api_details, dict): - with st.container(): # Sostituisce l'expander interno - st.markdown("###### Dettagli Chiamata API di Valutazione") - if api_details.get('request'): - st.caption("Richiesta API:") - st.json(api_details['request'], expanded=False) - if api_details.get('response_content'): - st.caption("Contenuto Risposta API:") - st.code(json.dumps(json.loads(api_details['response_content']), indent=2) if isinstance(api_details['response_content'], str) else json.dumps(api_details['response_content'], indent=2), language="json") - if api_details.get('error'): - st.caption("Errore API:") - st.error(api_details['error']) - - st.markdown("--- --- ---") + + # Mostra Dettagli API di Generazione (se presenti e richiesti) + generation_api_details = q_data.get('generation_api_details') + if generation_api_details and isinstance(generation_api_details, dict): + with st.container(): + st.markdown("###### Dettagli Chiamata API di Generazione Risposta") + if generation_api_details.get('request'): + st.caption("Richiesta API Generazione:") + st.json( + generation_api_details['request'], expanded=False + ) + if generation_api_details.get('response_content'): + st.caption("Contenuto Risposta API Generazione:") + # Prova a formattare se è una stringa JSON, altrimenti mostra com'è + try: + content = generation_api_details['response_content'] + if isinstance(content, str): + response_data_gen = json.loads(content) + else: + response_data_gen = content + st.code( + json.dumps(response_data_gen, indent=2), + language="json", + ) + except Exception: + st.text( + generation_api_details['response_content'] + ) + if generation_api_details.get('error'): + st.caption("Errore API Generazione:") + st.error(generation_api_details['error']) + st.divider() + + if evaluation_method == "LLM": + evaluation = q_data.get( + 'evaluation', {} + ) # Assicurati che evaluation sia sempre un dizionario + st.markdown("##### Valutazione LLM") + score = evaluation.get('score', 0) + explanation = evaluation.get( + 'explanation', "Nessuna spiegazione." + ) + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") + st.markdown(f"**Spiegazione:** {explanation}") + + cols_eval_metrics = st.columns(3) + cols_eval_metrics[0].metric( + "Somiglianza", f"{similarity:.2f}%" + ) + cols_eval_metrics[1].metric( + "Correttezza", f"{correctness:.2f}%" + ) + cols_eval_metrics[2].metric( + "Completezza", f"{completeness:.2f}%" + ) + + api_details = evaluation.get('api_details') + if api_details and isinstance(api_details, dict): + with st.container(): # Sostituisce l'expander interno + st.markdown( + "###### Dettagli Chiamata API di Valutazione" + ) + if api_details.get('request'): + st.caption("Richiesta API:") + st.json(api_details['request'], expanded=False) + if api_details.get('response_content'): + st.caption("Contenuto Risposta API:") + content = api_details['response_content'] + parsed = json.loads(content) if isinstance( + content, str + ) else content + st.code( + json.dumps(parsed, indent=2), + language="json", + ) + if api_details.get('error'): + st.caption("Errore API:") + st.error(api_details['error']) + + st.markdown("--- --- ---") From c9a49b3037871a5589f2e97e4a2380109546e799 Mon Sep 17 00:00:00 2001 From: oniichan Date: Sun, 3 Aug 2025 18:26:35 +0200 Subject: [PATCH 03/41] changed project structure --- app.py | 6 +- controllers/__init__.py | 88 ++++++ controllers/api_preset_controller.py | 69 ++++- controllers/question_controller.py | 210 +++++++++----- controllers/question_set_controller.py | 241 ++++++++++++++-- controllers/startup_controller.py | 18 +- controllers/test_controller.py | 385 +++++++++++++++++++++---- models/api_preset.py | 36 +-- models/question.py | 21 +- models/question_set.py | 20 +- models/test_result.py | 23 +- services/__init__.py | 0 services/cache.py | 47 --- services/evaluation_service.py | 147 ---------- services/openai_service.py | 93 ------ services/question_service.py | 50 ---- services/question_set_importer.py | 256 ---------------- tests/test_question_controller.py | 68 ++++- tests/test_question_set_controller.py | 49 ++-- tests/test_question_set_importer.py | 51 +++- tests/test_statistics.py | 2 +- 21 files changed, 1008 insertions(+), 872 deletions(-) delete mode 100644 services/__init__.py delete mode 100644 services/cache.py delete mode 100644 services/evaluation_service.py delete mode 100644 services/openai_service.py delete mode 100644 services/question_service.py delete mode 100644 services/question_set_importer.py diff --git a/app.py b/app.py index 6145674..13bf145 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,7 @@ import streamlit as st -from view import ( +from views import ( api_configurazione, esecuzione_test, gestione_domande, @@ -10,8 +10,8 @@ home, visualizza_risultati, ) -from view.session_state import initialize_session_state -from view.style_utils import add_global_styles +from views.session_state import initialize_session_state +from views.style_utils import add_global_styles from logging_config import setup_logging setup_logging() diff --git a/controllers/__init__.py b/controllers/__init__.py index e69de29..76f64ef 100644 --- a/controllers/__init__.py +++ b/controllers/__init__.py @@ -0,0 +1,88 @@ +"""Expose controller utilities for external use.""" + +# API preset management +from .api_preset_controller import ( + load_presets, + refresh_api_presets, + list_presets, + get_preset_by_id, + validate_preset, + save_preset, + delete_preset, + test_api_connection, +) + +# Question CRUD +from .question_controller import ( + load_questions, + refresh_questions, + add_question, + update_question, + delete_question, + filter_questions_by_category, + import_questions_from_file, +) + +# Question set management +from .question_set_controller import ( + load_sets, + refresh_question_sets, + create_set, + update_set, + delete_set, + import_sets_from_file, +) + +# Results and evaluation utilities +from .test_controller import ( + load_results, + refresh_results, + add_result, + save_results, + import_results_from_file, + calculate_statistics, + evaluate_answer, + execute_llm_test, +) + +# Import helpers +from .startup_controller import get_initial_state + + +__all__ = [ + # API preset + "load_presets", + "refresh_api_presets", + "list_presets", + "get_preset_by_id", + "validate_preset", + "save_preset", + "delete_preset", + "test_api_connection", + # Questions + "load_questions", + "refresh_questions", + "add_question", + "update_question", + "delete_question", + "filter_questions_by_category", + "import_questions_from_file", + # Question sets + "load_sets", + "refresh_question_sets", + "create_set", + "update_set", + "delete_set", + "import_sets_from_file", + # Test results + "load_results", + "refresh_results", + "add_result", + "save_results", + "import_results_from_file", + "calculate_statistics", + "evaluate_answer", + "execute_llm_test", + # Startup + "get_initial_state", +] diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py index c77a741..b23b9a9 100644 --- a/controllers/api_preset_controller.py +++ b/controllers/api_preset_controller.py @@ -1,4 +1,4 @@ -"""Business logic per la gestione dei preset API.""" +"""Utility per la gestione dei preset API.""" import uuid from typing import List, Optional, Tuple @@ -6,21 +6,13 @@ import pandas as pd from models.api_preset import APIPreset -from services.cache import ( +from utils.cache import ( get_api_presets as _get_api_presets, refresh_api_presets as _refresh_api_presets, ) -from controllers.startup_controller import ( - get_default_api_settings as _startup_get_default_api_settings, -) -DEFAULT_API_SETTINGS = _startup_get_default_api_settings() -DEFAULT_MODEL = DEFAULT_API_SETTINGS["model"] -DEFAULT_ENDPOINT = DEFAULT_API_SETTINGS["endpoint"] - +from openai import APIConnectionError, APIStatusError, RateLimitError -def get_default_api_settings() -> dict: - """Restituisce l'endpoint e il modello API predefiniti.""" - return DEFAULT_API_SETTINGS.copy() +from . import openai_client def load_presets() -> pd.DataFrame: @@ -40,7 +32,9 @@ def list_presets(df: pd.DataFrame | None = None) -> List[dict]: return df.to_dict(orient="records") -def get_preset_by_id(preset_id: str, df: pd.DataFrame | None = None) -> Optional[dict]: +def get_preset_by_id( + preset_id: str, df: pd.DataFrame | None = None +) -> Optional[dict]: """Recupera un singolo preset dato il suo ID.""" if df is None: df = load_presets() @@ -64,7 +58,9 @@ def validate_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, return True, "" -def save_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, str, pd.DataFrame]: +def save_preset( + data: dict, preset_id: Optional[str] = None +) -> Tuple[bool, str, pd.DataFrame]: """Salva un nuovo preset o aggiorna uno esistente.""" is_valid, message = validate_preset(data, preset_id) if not is_valid: @@ -107,3 +103,48 @@ def delete_preset(preset_id: str) -> Tuple[bool, str, pd.DataFrame]: APIPreset.delete(preset_id) updated_df = refresh_api_presets() return True, f"Preset '{preset_name}' eliminato.", updated_df + + +def test_api_connection( + api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int +) -> Tuple[bool, str]: + """Testa la connessione all'API LLM con i parametri forniti.""" + + client = openai_client.get_openai_client(api_key=api_key, base_url=endpoint) + if not client: + return False, "Client API non inizializzato. Controlla chiave API e endpoint." + + try: + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "user", + "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'", + } + ], + temperature=temperature, + max_tokens=max_tokens, + ) + content = response.choices[0].message.content or "" + if "Connessione riuscita." in content: + return True, "Connessione API riuscita!" + return ( + False, + "Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): " + f"{content[:200]}...", + ) + except APIConnectionError as e: + return False, f"Errore di connessione API: {e}" + except RateLimitError as e: + return False, f"Errore di Rate Limit API: {e}" + except APIStatusError as e: + return ( + False, + "Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', " + f"autenticazione fallita, quota superata): {e.status_code} - {e.message}", + ) + except Exception as exc: # noqa: BLE001 + return False, ( + f"Errore imprevisto durante il test della connessione: {type(exc).__name__} - {exc}" + ) diff --git a/controllers/question_controller.py b/controllers/question_controller.py index 3ffc676..f2edfbd 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,37 +1,53 @@ -import os +"""Controller per la gestione delle domande senza layer di service.""" +from typing import Optional, Tuple, List + import json +import os import uuid -from typing import Optional, Tuple, List import pandas as pd from models.question import Question -from services.question_service import load_questions, refresh_questions +from utils.cache import ( + get_questions as _get_questions, + refresh_questions as _refresh_questions, +) -def filter_questions_by_category(category: Optional[str] = None) -> Tuple[pd.DataFrame, List[str]]: - """Ritorna le domande filtrate per categoria e la lista delle categorie disponibili.""" - df = load_questions() +def load_questions() -> pd.DataFrame: + """Restituisce tutte le domande utilizzando la cache.""" + return _get_questions() - if df.empty: - return df, [] - if "categoria" not in df.columns: - df["categoria"] = "" - else: - df["categoria"] = df["categoria"].fillna("N/A") +def refresh_questions() -> pd.DataFrame: + """Svuota e ricarica la cache delle domande.""" + return _refresh_questions() - categories = sorted(list(df["categoria"].astype(str).unique())) - if category: - filtered_df = df[df["categoria"] == category] - else: - filtered_df = df +def add_question_if_not_exists( + question_id: str, + domanda: str, + risposta_attesa: str, + categoria: str = "", +) -> bool: + """Aggiunge una domanda solo se non esiste già.""" - return filtered_df, categories + df = load_questions() + if str(question_id) in df["id"].astype(str).values: + return False + + Question.add(domanda, risposta_attesa, categoria, question_id) + refresh_questions() + return True -def add_question(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: +def add_question( + domanda: str, + risposta_attesa: str, + categoria: str = "", + question_id: Optional[str] = None, +) -> str: + """Aggiunge una nuova domanda e aggiorna la cache.""" qid = Question.add(domanda, risposta_attesa, categoria, question_id) refresh_questions() return qid @@ -43,72 +59,120 @@ def update_question( risposta_attesa: Optional[str] = None, categoria: Optional[str] = None, ) -> bool: - """Aggiorna una domanda e restituisce l'esito dell'operazione.""" + """Aggiorna una domanda esistente e ricarica la cache.""" updated = Question.update(question_id, domanda, risposta_attesa, categoria) refresh_questions() return updated def delete_question(question_id: str) -> None: + """Elimina una domanda e aggiorna la cache.""" Question.delete(question_id) refresh_questions() +def filter_questions_by_category( + category: Optional[str] = None, +) -> Tuple[pd.DataFrame, List[str]]: + """Restituisce le domande filtrate per categoria e tutte le categorie.""" + + df = load_questions() + + if df.empty: + return df, [] + + if "categoria" not in df.columns: + df["categoria"] = "" + else: + df["categoria"] = df["categoria"].fillna("N/A") + + categories = sorted(list(df["categoria"].astype(str).unique())) + + if category: + filtered_df = df[df["categoria"] == category] + else: + filtered_df = df + + return filtered_df, categories + + def import_questions_from_file(file) -> Tuple[bool, str]: """Importa domande da un file CSV o JSON.""" - try: - file_extension = os.path.splitext(file.name)[1].lower() - imported_df = None - - if file_extension == '.csv': - imported_df = pd.read_csv(file) - elif file_extension == '.json': - data = json.load(file) - if isinstance(data, list): - imported_df = pd.DataFrame(data) - elif isinstance(data, dict) and 'questions' in data and isinstance(data['questions'], list): - imported_df = pd.DataFrame(data['questions']) + + def _import(file) -> Tuple[bool, str]: + try: + file_extension = os.path.splitext(file.name)[1].lower() + imported_df = None + + if file_extension == ".csv": + imported_df = pd.read_csv(file) + elif file_extension == ".json": + data = json.load(file) + if isinstance(data, list): + imported_df = pd.DataFrame(data) + elif ( + isinstance(data, dict) + and "questions" in data + and isinstance(data["questions"], list) + ): + imported_df = pd.DataFrame(data["questions"]) + else: + return False, ( + "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." + ) + else: + return False, "Formato file non supportato. Caricare un file CSV o JSON." + + if imported_df is None or imported_df.empty: + return False, "Il file importato è vuoto o non contiene dati validi." + + if "question" in imported_df.columns and "domanda" not in imported_df.columns: + imported_df.rename(columns={"question": "domanda"}, inplace=True) + if ( + "expected_answer" in imported_df.columns + and "risposta_attesa" not in imported_df.columns + ): + imported_df.rename( + columns={"expected_answer": "risposta_attesa"}, inplace=True + ) + + required_columns = ["domanda", "risposta_attesa"] + if not all(col in imported_df.columns for col in required_columns): + return ( + False, + f"Il file importato deve contenere le colonne '{required_columns[0]}' " + f"e '{required_columns[1]}'.", + ) + + if "id" not in imported_df.columns: + imported_df["id"] = [str(uuid.uuid4()) for _ in range(len(imported_df))] else: - return False, "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." - else: - return False, "Formato file non supportato. Caricare un file CSV o JSON." - - if imported_df is None or imported_df.empty: - return False, "Il file importato è vuoto o non contiene dati validi." - - if 'question' in imported_df.columns and 'domanda' not in imported_df.columns: - imported_df.rename(columns={'question': 'domanda'}, inplace=True) - if 'expected_answer' in imported_df.columns and 'risposta_attesa' not in imported_df.columns: - imported_df.rename(columns={'expected_answer': 'risposta_attesa'}, inplace=True) - - required_columns = ['domanda', 'risposta_attesa'] - if not all(col in imported_df.columns for col in required_columns): - return ( - False, - f"Il file importato deve contenere le colonne '{required_columns[0]}' " - f"e '{required_columns[1]}'.", + imported_df["id"] = imported_df["id"].astype(str) + + if "categoria" not in imported_df.columns: + imported_df["categoria"] = "" + else: + imported_df["categoria"] = imported_df["categoria"].astype(str).fillna("") + + imported_df["domanda"] = imported_df["domanda"].astype(str).fillna("") + imported_df["risposta_attesa"] = ( + imported_df["risposta_attesa"].astype(str).fillna("") ) - if 'id' not in imported_df.columns: - imported_df['id'] = [str(uuid.uuid4()) for _ in range(len(imported_df))] - else: - imported_df['id'] = imported_df['id'].astype(str) - - if 'categoria' not in imported_df.columns: - imported_df['categoria'] = "" - else: - imported_df['categoria'] = imported_df['categoria'].astype(str).fillna("") - - imported_df['domanda'] = imported_df['domanda'].astype(str).fillna("") - imported_df['risposta_attesa'] = imported_df['risposta_attesa'].astype(str).fillna("") - - final_imported_df = imported_df[['id', 'domanda', 'risposta_attesa', 'categoria']] - - added_count = 0 - for _, row in final_imported_df.iterrows(): - Question.add(row['domanda'], row['risposta_attesa'], row['categoria'], question_id=row['id']) - added_count += 1 - refresh_questions() - return True, f"Importate con successo {added_count} domande." - except Exception as e: - return False, f"Errore durante l'importazione delle domande: {str(e)}" + final_imported_df = imported_df[["id", "domanda", "risposta_attesa", "categoria"]] + + added_count = 0 + for _, row in final_imported_df.iterrows(): + Question.add( + row["domanda"], + row["risposta_attesa"], + row["categoria"], + question_id=row["id"], + ) + added_count += 1 + refresh_questions() + return True, f"Importate con successo {added_count} domande." + except Exception as e: # pragma: no cover - errors should be rare and simple + return False, f"Errore durante l'importazione delle domande: {str(e)}" + + return _import(file) diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 0d364d2..9a266fb 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -1,14 +1,18 @@ -from typing import List, Optional, Any, Dict -import pandas as pd -import json import os +import json +from typing import List, Optional, Any, Dict, Tuple + +import pandas as pd + +from .question_controller import add_question_if_not_exists, load_questions from models.question_set import QuestionSet -from services.question_service import load_questions -from services.cache import ( +from utils.cache import ( get_question_sets as _get_question_sets, refresh_question_sets as _refresh_question_sets, ) -from services.question_set_importer import parse_input, persist_sets + + +REQUIRED_CSV_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] def load_sets() -> pd.DataFrame: @@ -22,19 +26,222 @@ def refresh_question_sets() -> pd.DataFrame: def create_set(name: str, question_ids: Optional[List[str]] = None) -> str: + """Crea un nuovo set di domande e aggiorna la cache.""" set_id = QuestionSet.create(name, question_ids) - _refresh_question_sets() + refresh_question_sets() return set_id -def update_set(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: +def update_set( + set_id: str, + name: Optional[str] = None, + question_ids: Optional[List[str]] = None, +) -> None: + """Aggiorna un set di domande esistente e ricarica la cache.""" QuestionSet.update(set_id, name, question_ids) - _refresh_question_sets() + refresh_question_sets() def delete_set(set_id: str) -> None: + """Elimina un set di domande e aggiorna la cache.""" QuestionSet.delete(set_id) - _refresh_question_sets() + refresh_question_sets() + + +def parse_input(uploaded_file) -> List[Dict[str, Any]]: + """Analizza un file CSV o JSON in una lista di dizionari di set di domande.""" + file_extension = os.path.splitext(uploaded_file.name)[1].lower() + + if file_extension == ".csv": + df = pd.read_csv(uploaded_file) + missing = [c for c in REQUIRED_CSV_COLUMNS if c not in df.columns] + if missing: + raise ValueError( + "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_CSV_COLUMNS) + ) + + sets_dict: Dict[str, List[Dict[str, str]]] = {} + for _, row in df.iterrows(): + name = str(row["name"]).strip() + if not name: + continue + question = { + "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", + "domanda": str(row["domanda"]).strip() + if not pd.isna(row["domanda"]) + else "", + "risposta_attesa": str(row["risposta_attesa"]).strip() + if not pd.isna(row["risposta_attesa"]) + else "", + "categoria": str(row["categoria"]).strip() + if not pd.isna(row["categoria"]) + else "", + } + sets_dict.setdefault(name, []).append(question) + return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] + + string_data = uploaded_file.getvalue().decode("utf-8") + data = json.loads(string_data) + if not isinstance(data, list): + raise ValueError( + "Formato JSON non valido. Il file deve contenere una lista (array) di set." + ) + return data + + +def resolve_question_ids( + questions_in_set_data: List[Any], + current_questions: pd.DataFrame, +) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: + """Risolve gli identificatori delle domande per un set di domande.""" + warnings: List[str] = [] + question_ids: List[str] = [] + new_added = 0 + existing_found = 0 + + for q_idx, q_data in enumerate(questions_in_set_data): + if isinstance(q_data, dict): + q_id = str(q_data.get("id", "")) + q_text = q_data.get("domanda", "") + q_answer = q_data.get("risposta_attesa", "") + q_category = q_data.get("categoria", "") + else: + q_id = str(q_data) + q_text = "" + q_answer = "" + q_category = "" + + if not q_id: + warnings.append(f"Domanda #{q_idx + 1} senza ID (saltata).") + continue + + if q_text and q_answer: + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + was_added = add_question_if_not_exists( + question_id=q_id, + domanda=q_text, + risposta_attesa=q_answer, + categoria=q_category, + ) + if was_added: + new_added += 1 + question_ids.append(q_id) + new_row = pd.DataFrame( + { + "id": [q_id], + "domanda": [q_text], + "risposta_attesa": [q_answer], + "categoria": [q_category], + } + ) + current_questions = pd.concat( + [current_questions, new_row], ignore_index=True + ) + else: + existing_found += 1 + question_ids.append(q_id) + continue + + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + warnings.append( + f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." + ) + + return question_ids, current_questions, new_added, existing_found, warnings + + +def persist_sets( + sets_data: List[Dict[str, Any]], + current_questions: pd.DataFrame, + current_sets: pd.DataFrame, +) -> Dict[str, Any]: + """Crea set di domande dai dati analizzati.""" + sets_imported_count = 0 + new_questions_added_count = 0 + existing_questions_found_count = 0 + warnings: List[str] = [] + + for set_idx, set_data in enumerate(sets_data): + if not isinstance(set_data, dict): + warnings.append( + f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." + ) + continue + + set_name = set_data.get("name") + questions_in_set_data = set_data.get("questions", []) + + if not set_name or not isinstance(set_name, str) or not set_name.strip(): + warnings.append( + f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." + ) + continue + + if not isinstance(questions_in_set_data, list): + warnings.append( + f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." + ) + continue + + if set_name in current_sets.get("name", pd.Series([])).values: + warnings.append( + f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." + ) + continue + + question_ids, current_questions, added, existing, q_warnings = resolve_question_ids( + questions_in_set_data, current_questions + ) + warnings.extend(q_warnings) + + if question_ids or len(questions_in_set_data) == 0: + try: + QuestionSet.create(set_name, question_ids) + sets_imported_count += 1 + except Exception as e: # pragma: no cover - protective + warnings.append( + f"Errore durante la creazione del set '{set_name}': {e}" + ) + else: + warnings.append( + f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + ) + + new_questions_added_count += added + existing_questions_found_count += existing + + sets_df = refresh_question_sets() + + success = sets_imported_count > 0 + success_message = "" + if success: + parts = [] + if sets_imported_count > 0: + parts.append(f"{sets_imported_count} set importati") + if new_questions_added_count > 0: + parts.append(f"{new_questions_added_count} nuove domande aggiunte") + if existing_questions_found_count > 0: + parts.append( + f"{existing_questions_found_count} domande esistenti referenziate" + ) + success_message = ". ".join(parts) + "." + + return { + "sets_imported_count": sets_imported_count, + "new_questions_added_count": new_questions_added_count, + "existing_questions_found_count": existing_questions_found_count, + "questions_df": current_questions, + "sets_df": sets_df, + "warnings": warnings, + "success": success, + "success_message": success_message, + } def import_sets_from_file(uploaded_file) -> Dict[str, Any]: @@ -67,17 +274,11 @@ def import_sets_from_file(uploaded_file) -> Dict[str, Any]: "warnings": persist_result["warnings"], } ) - if not persist_result["success"]: - result["error_message"] = ( - "Nessun set o domanda valida trovata nel file per l'importazione." - ) except json.JSONDecodeError: - result["error_message"] = ( - "Errore di decodifica JSON. Assicurati che il file sia un JSON valido." - ) - except ValueError as e: - result["error_message"] = str(e) - except Exception as e: + result["error_message"] = "Il formato del file json non è valido" + except ValueError: + result["error_message"] = "Il formato del file json non è valido" + except Exception as e: # pragma: no cover - general protection result["error_message"] = f"Errore imprevisto durante l'importazione: {str(e)}" return result diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py index f45f166..d052d83 100644 --- a/controllers/startup_controller.py +++ b/controllers/startup_controller.py @@ -1,28 +1,22 @@ import os -from controllers.db_controller import initialize_database -from services.question_service import load_questions +from models.db_utils import init_db +from controllers.question_controller import load_questions from controllers.question_set_controller import load_sets from controllers.test_controller import load_results -from services.openai_service import DEFAULT_MODEL, DEFAULT_ENDPOINT - - -def get_default_api_settings() -> dict: - """Restituisce l'endpoint e il modello API predefiniti.""" - return {"model": DEFAULT_MODEL, "endpoint": DEFAULT_ENDPOINT} +from controllers.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT def get_initial_state() -> dict: """Inizializza il database e restituisce lo stato di default dell'applicazione.""" - initialize_database() - defaults = get_default_api_settings() + init_db() return { "questions": load_questions(), "question_sets": load_sets(), "results": load_results(), "api_key": os.environ.get("OPENAI_API_KEY", ""), - "endpoint": defaults["endpoint"], - "model": defaults["model"], + "endpoint": DEFAULT_ENDPOINT, + "model": DEFAULT_MODEL, "temperature": 0.0, "max_tokens": 1000, } diff --git a/controllers/test_controller.py b/controllers/test_controller.py index 86935af..0eaacb2 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -1,43 +1,55 @@ -import pandas as pd -from typing import Dict, Tuple, List +"""Funzioni per la gestione dei test e della valutazione tramite LLM.""" + +from __future__ import annotations + import json +import logging import uuid from datetime import datetime +from typing import Dict, List, Tuple + +import pandas as pd +from openai import APIConnectionError, APIStatusError, RateLimitError + +from models.question import Question from models.test_result import TestResult -from services.cache import ( +from . import openai_client +from utils.cache import ( get_results as _get_results, refresh_results as _refresh_results, ) -from controllers.openai_controller import ( - evaluate_answer, - generate_example_answer_with_llm, -) -from services.question_service import load_questions def load_results() -> pd.DataFrame: """Restituisce i risultati dei test utilizzando la cache.""" + return _get_results() def refresh_results() -> pd.DataFrame: """Svuota e ricarica la cache dei risultati dei test.""" + return _refresh_results() def add_result(set_id: str, results_data: Dict) -> str: + """Aggiunge un nuovo risultato di test e aggiorna la cache.""" + rid = TestResult.add(set_id, results_data) - _refresh_results() + refresh_results() return rid def save_results(df: pd.DataFrame) -> None: + """Salva il DataFrame dei risultati e aggiorna la cache.""" + TestResult.save_df(df) - _refresh_results() + refresh_results() def import_results_from_file(file) -> Tuple[bool, str]: """Importa risultati di test da un file JSON.""" + try: data = json.load(file) if isinstance(data, dict): @@ -52,22 +64,29 @@ def import_results_from_file(file) -> Tuple[bool, str]: if not isinstance(item, dict): continue - result_id = str(item.get('id', uuid.uuid4())) - if result_id in results_df['id'].astype(str).values: + result_id = str(item.get("id", uuid.uuid4())) + if result_id in results_df["id"].astype(str).values: continue - set_id = str(item.get('set_id', '')) - timestamp = str(item.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) - results_content = item.get('results', {}) + set_id = str(item.get("set_id", "")) + timestamp = str( + item.get( + "timestamp", + datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + ) + ) + results_content = item.get("results", {}) new_row = { - 'id': result_id, - 'set_id': set_id, - 'timestamp': timestamp, - 'results': results_content if isinstance(results_content, dict) else {} + "id": result_id, + "set_id": set_id, + "timestamp": timestamp, + "results": results_content if isinstance(results_content, dict) else {}, } - results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True) + results_df = pd.concat( + [results_df, pd.DataFrame([new_row])], ignore_index=True + ) added_count += 1 if added_count > 0: @@ -77,12 +96,13 @@ def import_results_from_file(file) -> Tuple[bool, str]: message = "Nessun nuovo risultato importato." return True, message - except Exception as e: + except Exception as e: # noqa: BLE001 return False, f"Errore durante l'importazione dei risultati: {str(e)}" def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: """Calcola statistiche dai risultati grezzi delle domande.""" + if not questions_results: return { "avg_score": 0, @@ -100,10 +120,9 @@ def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: for qid, qdata in questions_results.items(): evaluation = qdata.get("evaluation", {}) score = evaluation.get("score", 0) - per_question_scores.append({ - "question": qdata.get("question", f"Domanda {qid}"), - "score": score, - }) + per_question_scores.append( + {"question": qdata.get("question", f"Domanda {qid}"), "score": score} + ) for metric in radar_sums.keys(): radar_sums[metric] += evaluation.get(metric, 0) @@ -112,7 +131,8 @@ def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: sum(item["score"] for item in per_question_scores) / count if count > 0 else 0 ) radar_metrics = { - metric: radar_sums[metric] / count if count > 0 else 0 for metric in radar_sums + metric: radar_sums[metric] / count if count > 0 else 0 + for metric in radar_sums } return { @@ -122,6 +142,229 @@ def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: } +def evaluate_answer( + question: str, + expected_answer: str, + actual_answer: str, + client_config: dict, + show_api_details: bool = False, +): + """Valuta una risposta utilizzando un LLM specificato tramite client_config.""" + + client = openai_client.get_openai_client( + api_key=client_config.get("api_key"), + base_url=client_config.get("endpoint"), + ) + if not client: + return { + "score": 0, + "explanation": "Errore: Client API per la valutazione non configurato.", + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + + prompt = f""" + Sei un valutatore esperto che valuta la qualità delle risposte alle domande. + Domanda: {question} + Risposta Attesa: {expected_answer} + Risposta Effettiva: {actual_answer} + + Valuta la risposta effettiva rispetto alla risposta attesa in base a: + 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? + 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? + 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? + Calcola un punteggio complessivo (0-100) basato su queste metriche. + Fornisci una breve spiegazione della tua valutazione (max 100 parole). + Formatta la tua risposta come un oggetto JSON con questi campi: + - score: il punteggio complessivo (numero) + - explanation: la tua spiegazione (stringa) + - similarity: punteggio di somiglianza (numero) + - correctness: punteggio di correttezza (numero) + - completeness: punteggio di completezza (numero) + Esempio di risposta JSON: + {{ + "score": 95, + "explanation": "La risposta è corretta e completa", + "similarity": 90, + "correctness": 100, + "completeness": 95 + }} + """ + + api_request_details = { + "model": client_config.get("model", openai_client.DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.0), + "max_tokens": client_config.get("max_tokens", 250), + "response_format": {"type": "json_object"}, + } + + api_details_for_log = {} + if show_api_details: + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + choices = getattr(response, "choices", None) + if not choices: + logging.error("Risposta API priva di 'choices' validi") + if show_api_details: + api_details_for_log["response_content"] = "" + return { + "score": 0, + "explanation": "Errore: risposta API non valida.", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + content = choices[0].message.content or "{}" + if show_api_details: + api_details_for_log["response_content"] = content + + try: + evaluation = json.loads(content) + required_keys = [ + "score", + "explanation", + "similarity", + "correctness", + "completeness", + ] + if not all(key in evaluation for key in required_keys): + logging.warning( + f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." + ) + for key in required_keys: + if key not in evaluation: + evaluation[key] = ( + 0 + if key != "explanation" + else "Valutazione incompleta o formato JSON non corretto." + ) + + evaluation["api_details"] = api_details_for_log + return evaluation + except json.JSONDecodeError: + logging.error( + f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" + ) + return { + "score": 0, + "explanation": f"Errore di decodifica JSON: {content[:100]}...", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") + api_details_for_log["error"] = str(e) + return { + "score": 0, + "explanation": f"Errore API: {type(e).__name__}", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + except Exception as exc: # noqa: BLE001 + logging.error( + f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" + ) + api_details_for_log["error"] = str(exc) + return { + "score": 0, + "explanation": f"Errore imprevisto: {type(exc).__name__}", + "similarity": 0, + "correctness": 0, + "completeness": 0, + "api_details": api_details_for_log, + } + + +def generate_example_answer_with_llm( + question: str, client_config: dict, show_api_details: bool = False +): + """Genera una risposta di esempio per una domanda utilizzando un LLM.""" + + client = openai_client.get_openai_client( + api_key=client_config.get("api_key"), + base_url=client_config.get("endpoint"), + ) + if not client: + logging.error("Client API per la generazione risposte non configurato.") + return { + "answer": None, + "api_details": {"error": "Client API non configurato"} + if show_api_details + else None, + } + + if question is None or not isinstance(question, str) or question.strip() == "": + logging.error("La domanda fornita è vuota o non valida.") + return { + "answer": None, + "api_details": {"error": "Domanda vuota o non valida"} + if show_api_details + else None, + } + + prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" + + api_request_details = { + "model": client_config.get("model", openai_client.DEFAULT_MODEL), + "messages": [{"role": "user", "content": prompt}], + "temperature": client_config.get("temperature", 0.7), + "max_tokens": client_config.get("max_tokens", 500), + } + + api_details_for_log = {} + if show_api_details: + api_details_for_log["request"] = api_request_details.copy() + + try: + response = client.chat.completions.create(**api_request_details) + answer = ( + response.choices[0].message.content.strip() + if response.choices and response.choices[0].message.content + else None + ) + if show_api_details: + api_details_for_log["response_content"] = ( + response.choices[0].message.content + if response.choices + else "Nessun contenuto" + ) + return { + "answer": answer, + "api_details": api_details_for_log if show_api_details else None, + } + + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logging.error( + f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + ) + if show_api_details: + api_details_for_log["error"] = str(e) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + except Exception as exc: # noqa: BLE001 + logging.error( + f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" + ) + if show_api_details: + api_details_for_log["error"] = str(exc) + return { + "answer": None, + "api_details": api_details_for_log if show_api_details else None, + } + + def execute_llm_test( set_id: str, set_name: str, @@ -130,24 +373,25 @@ def execute_llm_test( eval_preset_config: Dict, show_api_details: bool = False, ) -> Dict: - """ - Esegue la generazione delle risposte e la valutazione tramite LLM per - un elenco di domande. Restituisce i dettagli dei risultati e aggiorna - la cache dei risultati salvati. - """ - questions_df = load_questions() + """Esegue la generazione e valutazione delle risposte tramite LLM.""" + + questions = [ + {"id": q.id, "question": q.domanda, "expected_answer": q.risposta_attesa} + for q in Question.load_all() + ] + questions_df = pd.DataFrame(questions) def get_question_data(qid: str): - row = questions_df[questions_df['id'] == str(qid)] + row = questions_df[questions_df["id"] == str(qid)] if row.empty: return None - question = row.iloc[0].get('domanda', row.iloc[0].get('question', '')) - expected = row.iloc[0].get('risposta_attesa', row.iloc[0].get('expected_answer', '')) + question = row.iloc[0].get("question", "") + expected = row.iloc[0].get("expected_answer", "") if not question or not isinstance(question, str) or question.strip() == "": return None if not expected or not isinstance(expected, str) or expected.strip() == "": expected = "Risposta non disponibile" - return {'question': question, 'expected_answer': expected} + return {"question": question, "expected_answer": expected} results: Dict = {} for q_id in question_ids: @@ -155,59 +399,72 @@ def get_question_data(qid: str): if not q_data: continue generation_output = generate_example_answer_with_llm( - q_data['question'], + q_data["question"], client_config=gen_preset_config, show_api_details=show_api_details, ) - actual_answer = generation_output.get('answer') - generation_api_details = generation_output.get('api_details') + actual_answer = generation_output.get("answer") + generation_api_details = generation_output.get("api_details") if actual_answer is None: results[q_id] = { - 'question': q_data['question'], - 'expected_answer': q_data['expected_answer'], - 'actual_answer': 'Errore Generazione', - 'evaluation': {'score': 0, 'explanation': 'Generazione fallita'}, - 'generation_api_details': generation_api_details, + "question": q_data["question"], + "expected_answer": q_data["expected_answer"], + "actual_answer": "Errore Generazione", + "evaluation": {"score": 0, "explanation": "Generazione fallita"}, + "generation_api_details": generation_api_details, } continue evaluation = evaluate_answer( - q_data['question'], - q_data['expected_answer'], + q_data["question"], + q_data["expected_answer"], actual_answer, client_config=eval_preset_config, show_api_details=show_api_details, ) results[q_id] = { - 'question': q_data['question'], - 'expected_answer': q_data['expected_answer'], - 'actual_answer': actual_answer, - 'evaluation': evaluation, - 'generation_api_details': generation_api_details, + "question": q_data["question"], + "expected_answer": q_data["expected_answer"], + "actual_answer": actual_answer, + "evaluation": evaluation, + "generation_api_details": generation_api_details, } if not results: return {} - avg_score = sum(r['evaluation']['score'] for r in results.values()) / len(results) + avg_score = sum(r["evaluation"]["score"] for r in results.values()) / len(results) result_data = { - 'set_name': set_name, - 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - 'avg_score': avg_score, - 'sample_type': 'Generata da LLM', - 'method': 'LLM', - 'generation_llm': gen_preset_config.get('model'), - 'evaluation_llm': eval_preset_config.get('model'), - 'questions': results, + "set_name": set_name, + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "avg_score": avg_score, + "sample_type": "Generata da LLM", + "method": "LLM", + "generation_llm": gen_preset_config.get("model"), + "evaluation_llm": eval_preset_config.get("model"), + "questions": results, } result_id = add_result(set_id, result_data) results_df = load_results() return { - 'result_id': result_id, - 'avg_score': avg_score, - 'results': results, - 'results_df': results_df, + "result_id": result_id, + "avg_score": avg_score, + "results": results, + "results_df": results_df, } + + +__all__ = [ + "load_results", + "refresh_results", + "add_result", + "save_results", + "import_results_from_file", + "calculate_statistics", + "evaluate_answer", + "generate_example_answer_with_llm", + "execute_llm_test", +] diff --git a/models/api_preset.py b/models/api_preset.py index 164fd1b..a9d8734 100644 --- a/models/api_preset.py +++ b/models/api_preset.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import List import pandas as pd from sqlalchemy import select @@ -18,33 +19,22 @@ class APIPreset: max_tokens: int @staticmethod - def load_all() -> pd.DataFrame: + def load_all() -> List["APIPreset"]: with get_session() as session: presets = session.execute(select(APIPresetORM)).scalars().all() - data = [ - { - "id": p.id, - "name": p.name, - "provider_name": p.provider_name, - "endpoint": p.endpoint, - "api_key": p.api_key, - "model": p.model, - "temperature": p.temperature, - "max_tokens": p.max_tokens, - } + return [ + APIPreset( + id=p.id, + name=p.name, + provider_name=p.provider_name, + endpoint=p.endpoint, + api_key=p.api_key, + model=p.model, + temperature=p.temperature, + max_tokens=p.max_tokens, + ) for p in presets ] - columns = [ - "id", - "name", - "provider_name", - "endpoint", - "api_key", - "model", - "temperature", - "max_tokens", - ] - return pd.DataFrame(data, columns=columns) @staticmethod def save_df(df: pd.DataFrame) -> None: diff --git a/models/question.py b/models/question.py index 047dfba..a0fc0d4 100644 --- a/models/question.py +++ b/models/question.py @@ -1,7 +1,6 @@ from dataclasses import dataclass -from typing import Optional +from typing import List, Optional import uuid -import pandas as pd from sqlalchemy import select, delete from models.db_utils import get_session @@ -16,20 +15,18 @@ class Question: categoria: str = "" @staticmethod - def load_all() -> pd.DataFrame: + def load_all() -> List["Question"]: with get_session() as session: results = session.execute(select(QuestionORM)).scalars().all() - data = [ - { - "id": q.id, - "domanda": q.domanda or "", - "risposta_attesa": q.risposta_attesa or "", - "categoria": q.categoria or "", - } + return [ + Question( + id=q.id, + domanda=q.domanda or "", + risposta_attesa=q.risposta_attesa or "", + categoria=q.categoria or "", + ) for q in results ] - columns = ["id", "domanda", "risposta_attesa", "categoria"] - return pd.DataFrame(data, columns=columns) @staticmethod def add(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: diff --git a/models/question_set.py b/models/question_set.py index dfbdc9d..0c30c15 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from typing import List, Optional import uuid -import pandas as pd from sqlalchemy import select from models.db_utils import get_session @@ -15,18 +14,17 @@ class QuestionSet: questions: List[str] = field(default_factory=list) @staticmethod - def load_all() -> pd.DataFrame: + def load_all() -> List["QuestionSet"]: with get_session() as session: sets = session.execute(select(QuestionSetORM)).scalars().all() - data = [] - for s in sets: - data.append({ - "id": s.id, - "name": s.name or "", - "questions": [q.id for q in s.questions], - }) - columns = ["id", "name", "questions"] - return pd.DataFrame(data, columns=columns) + return [ + QuestionSet( + id=s.id, + name=s.name or "", + questions=[q.id for q in s.questions], + ) + for s in sets + ] @staticmethod def create(name: str, question_ids: Optional[List[str]] = None) -> str: diff --git a/models/test_result.py b/models/test_result.py index 8e87013..87ea6da 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict +from typing import Dict, List import uuid import json import pandas as pd @@ -17,19 +17,18 @@ class TestResult: results: Dict @staticmethod - def load_all() -> pd.DataFrame: + def load_all() -> List["TestResult"]: with get_session() as session: results = session.execute(select(TestResultORM)).scalars().all() - data = [] - for r in results: - data.append({ - "id": r.id, - "set_id": r.set_id, - "timestamp": r.timestamp, - "results": r.results or {}, - }) - columns = ["id", "set_id", "timestamp", "results"] - return pd.DataFrame(data, columns=columns) + return [ + TestResult( + id=r.id, + set_id=r.set_id, + timestamp=r.timestamp, + results=r.results or {}, + ) + for r in results + ] @staticmethod def save_df(df: pd.DataFrame) -> None: diff --git a/services/__init__.py b/services/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/services/cache.py b/services/cache.py deleted file mode 100644 index 3335474..0000000 --- a/services/cache.py +++ /dev/null @@ -1,47 +0,0 @@ -from functools import lru_cache -import pandas as pd - -from models.question import Question -from models.question_set import QuestionSet -from models.api_preset import APIPreset -from models.test_result import TestResult - - -@lru_cache(maxsize=1) -def get_questions() -> pd.DataFrame: - return Question.load_all() - - -def refresh_questions() -> pd.DataFrame: - get_questions.cache_clear() - return get_questions() - - -@lru_cache(maxsize=1) -def get_question_sets() -> pd.DataFrame: - return QuestionSet.load_all() - - -def refresh_question_sets() -> pd.DataFrame: - get_question_sets.cache_clear() - return get_question_sets() - - -@lru_cache(maxsize=1) -def get_api_presets() -> pd.DataFrame: - return APIPreset.load_all() - - -def refresh_api_presets() -> pd.DataFrame: - get_api_presets.cache_clear() - return get_api_presets() - - -@lru_cache(maxsize=1) -def get_results() -> pd.DataFrame: - return TestResult.load_all() - - -def refresh_results() -> pd.DataFrame: - get_results.cache_clear() - return get_results() diff --git a/services/evaluation_service.py b/services/evaluation_service.py deleted file mode 100644 index 3bb9e40..0000000 --- a/services/evaluation_service.py +++ /dev/null @@ -1,147 +0,0 @@ -import json -import logging -from openai import APIConnectionError, RateLimitError, APIStatusError - -from services import openai_service - -__all__ = ["evaluate_answer"] - - -def evaluate_answer( - question: str, - expected_answer: str, - actual_answer: str, - client_config: dict, - show_api_details: bool = False, -): - """Valuta una risposta utilizzando un LLM specificato tramite client_config.""" - client = openai_service.get_openai_client( - api_key=client_config.get("api_key"), - base_url=client_config.get("endpoint"), - ) - if not client: - return { - "score": 0, - "explanation": "Errore: Client API per la valutazione non configurato.", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } - - prompt = f""" - Sei un valutatore esperto che valuta la qualità delle risposte alle domande. - Domanda: {question} - Risposta Attesa: {expected_answer} - Risposta Effettiva: {actual_answer} - - Valuta la risposta effettiva rispetto alla risposta attesa in base a: - 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? - 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? - 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? - Calcola un punteggio complessivo (0-100) basato su queste metriche. - Fornisci una breve spiegazione della tua valutazione (max 100 parole). - Formatta la tua risposta come un oggetto JSON con questi campi: - - score: il punteggio complessivo (numero) - - explanation: la tua spiegazione (stringa) - - similarity: punteggio di somiglianza (numero) - - correctness: punteggio di correttezza (numero) - - completeness: punteggio di completezza (numero) - Esempio di risposta JSON: - {{ - "score": 95, - "explanation": "La risposta è corretta e completa", - "similarity": 90, - "correctness": 100, - "completeness": 95 - }} - """ - - api_request_details = { - "model": client_config.get("model", openai_service.DEFAULT_MODEL), - "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.0), - "max_tokens": client_config.get("max_tokens", 250), - "response_format": {"type": "json_object"}, - } - - api_details_for_log = {} - if show_api_details: - api_details_for_log["request"] = api_request_details.copy() - - try: - response = client.chat.completions.create(**api_request_details) - choices = getattr(response, "choices", None) - if not choices: - logging.error("Risposta API priva di 'choices' validi") - if show_api_details: - api_details_for_log["response_content"] = "" - return { - "score": 0, - "explanation": "Errore: risposta API non valida.", - "similarity": 0, - "correctness": 0, - "completeness": 0, - "api_details": api_details_for_log, - } - content = choices[0].message.content or "{}" - if show_api_details: - api_details_for_log["response_content"] = content - - try: - evaluation = json.loads(content) - required_keys = [ - "score", - "explanation", - "similarity", - "correctness", - "completeness", - ] - if not all(key in evaluation for key in required_keys): - logging.warning( - f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." - ) - for key in required_keys: - if key not in evaluation: - evaluation[key] = ( - 0 if key != "explanation" else "Valutazione incompleta o formato JSON non corretto." - ) - - evaluation["api_details"] = api_details_for_log - return evaluation - except json.JSONDecodeError: - logging.error( - f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" - ) - return { - "score": 0, - "explanation": f"Errore di decodifica JSON: {content[:100]}...", - "similarity": 0, - "correctness": 0, - "completeness": 0, - "api_details": api_details_for_log, - } - - except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") - api_details_for_log["error"] = str(e) - return { - "score": 0, - "explanation": f"Errore API: {type(e).__name__}", - "similarity": 0, - "correctness": 0, - "completeness": 0, - "api_details": api_details_for_log, - } - except Exception as exc: - logging.error( - f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" - ) - api_details_for_log["error"] = str(exc) - return { - "score": 0, - "explanation": f"Errore imprevisto: {type(exc).__name__}", - "similarity": 0, - "correctness": 0, - "completeness": 0, - "api_details": api_details_for_log, - } diff --git a/services/openai_service.py b/services/openai_service.py deleted file mode 100644 index 824ddc2..0000000 --- a/services/openai_service.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Utilità di supporto per interagire con l'API di OpenAI.""" - -# mypy: ignore-errors - -import logging -from openai import OpenAI - -DEFAULT_MODEL = "gpt-4o" -DEFAULT_ENDPOINT = "https://api.openai.com/v1" - -# Modelli disponibili per diversi provider (esempio) -OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] -ANTHROPIC_MODELS = [ - "claude-3-opus-20240229", - "claude-3-sonnet-20240229", - "claude-3-haiku-20240307", -] -# Aggiungi altri provider e modelli se necessario -# XAI_MODELS = ["grok-1"] - - -def get_openai_client(api_key: str, base_url: str = None): - """ - Crea e restituisce un client OpenAI configurato. - Parametri: - api_key: La chiave API. - base_url: L'URL base dell'endpoint API (opzionale, default a OpenAI). - Restituisce: - Un'istanza del client OpenAI o None se la chiave API non è fornita. - """ - if not api_key: - logging.warning("Tentativo di creare client OpenAI senza chiave API.") - return None - try: - effective_base_url = ( - base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT - ) - return OpenAI(api_key=api_key, base_url=effective_base_url) - except Exception as exc: - logging.error(f"Errore durante la creazione del client OpenAI: {exc}") - return None - - -def get_available_models_for_endpoint( - provider_name: str, endpoint_url: str = None, api_key: str = None -): - """ - Restituisce una lista di modelli disponibili basata sul provider o tenta di elencarli dall'endpoint. - Parametri: - provider_name: Nome del provider (es. "OpenAI", "Anthropic", "Personalizzato"). - endpoint_url: URL dell'endpoint (rilevante per "Personalizzato"). - api_key: Chiave API per autenticarsi (necessaria per elencare modelli da endpoint personalizzati). - Restituisce: - Una lista di stringhe con i nomi dei modelli. - """ - if provider_name == "OpenAI": - return OPENAI_MODELS - elif provider_name == "Anthropic": - return ANTHROPIC_MODELS - # Aggiungi altri provider predefiniti qui - # elif provider_name == "XAI": - # return XAI_MODELS - elif provider_name == "Personalizzato": - if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): - return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] - - client = get_openai_client(api_key=api_key, base_url=endpoint_url) - if not client: - return ["(Errore creazione client API)", DEFAULT_MODEL] - try: - models = client.models.list() - filtered_models = sorted( - [ - model.id - for model in models - if not any(term in model.id.lower() for term in ["embed", "embedding"]) - and ( - any( - term in model.id.lower() - for term in ["chat", "instruct", "gpt", "claude", "grok"] - ) - or len(model.id.split("-")) > 2 - ) - ] - ) - if not filtered_models: - filtered_models = sorted( - [model.id for model in models if not any(term in model.id.lower() for term in ["embed", "embedding"])] - ) - return filtered_models if filtered_models else [DEFAULT_MODEL] - except Exception: - return ["(Errore recupero modelli)", DEFAULT_MODEL] - return [DEFAULT_MODEL] diff --git a/services/question_service.py b/services/question_service.py deleted file mode 100644 index 1d61caa..0000000 --- a/services/question_service.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Funzioni di utilità per gestire le domande e la relativa cache.""" -import pandas as pd -from models.question import Question -from services.cache import ( - get_questions as _get_questions, - refresh_questions as _refresh_questions, -) - - -def load_questions() -> pd.DataFrame: - """Restituisce tutte le domande utilizzando la cache.""" - return _get_questions() - - -def refresh_questions() -> pd.DataFrame: - """Svuota e ricarica la cache delle domande.""" - return _refresh_questions() - - -def add_question_if_not_exists( - question_id: str, - domanda: str, - risposta_attesa: str, - categoria: str = "", -) -> bool: - """Aggiunge una domanda solo se non esiste già. - - Parametri - ---------- - question_id: str - Identificatore della domanda da aggiungere. - domanda: str - Testo della domanda. - risposta_attesa: str - Risposta attesa. - categoria: str, opzionale - Categoria della domanda. - - Restituisce - ------- - bool - ``True`` se la domanda è stata aggiunta, ``False`` se esisteva già. - """ - df = Question.load_all() - if str(question_id) in df["id"].astype(str).values: - return False - - Question.add(domanda, risposta_attesa, categoria, question_id) - refresh_questions() - return True diff --git a/services/question_set_importer.py b/services/question_set_importer.py deleted file mode 100644 index 54f8e0b..0000000 --- a/services/question_set_importer.py +++ /dev/null @@ -1,256 +0,0 @@ -import os -import json -from typing import Any, Dict, List, Tuple - -import pandas as pd - -from services.question_service import add_question_if_not_exists -from models.question_set import QuestionSet -from services.cache import refresh_question_sets - - -REQUIRED_CSV_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] - - -def parse_input(uploaded_file) -> List[Dict[str, Any]]: - """Analizza un file CSV o JSON in una lista di dizionari di set di domande. - - Ogni elemento della lista restituita è un dizionario con le chiavi ``name`` e - ``questions``. Per i file CSV le righe sono raggruppate per la colonna ``name``. - - Solleva - ------ - ValueError - Se il file non contiene le colonne richieste o contiene JSON non valido. - """ - file_extension = os.path.splitext(uploaded_file.name)[1].lower() - - if file_extension == ".csv": - df = pd.read_csv(uploaded_file) - missing = [c for c in REQUIRED_CSV_COLUMNS if c not in df.columns] - if missing: - raise ValueError( - "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_CSV_COLUMNS) - ) - - sets_dict: Dict[str, List[Dict[str, str]]] = {} - for _, row in df.iterrows(): - name = str(row["name"]).strip() - if not name: - continue - question = { - "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", - "domanda": str(row["domanda"]).strip() - if not pd.isna(row["domanda"]) - else "", - "risposta_attesa": str(row["risposta_attesa"]).strip() - if not pd.isna(row["risposta_attesa"]) - else "", - "categoria": str(row["categoria"]).strip() - if not pd.isna(row["categoria"]) - else "", - } - sets_dict.setdefault(name, []).append(question) - return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] - - # Analisi JSON - string_data = uploaded_file.getvalue().decode("utf-8") - data = json.loads(string_data) - if not isinstance(data, list): - raise ValueError( - "Formato JSON non valido. Il file deve contenere una lista (array) di set." - ) - return data - - -def resolve_question_ids( - questions_in_set_data: List[Any], - current_questions: pd.DataFrame, -) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: - """Risolve gli identificatori delle domande per un set di domande. - - Parametri - ---------- - questions_in_set_data: - Una lista che descrive le domande in un set. Ogni elemento può essere - un dizionario con i dettagli oppure una stringa identificativa. - current_questions: - DataFrame contenente le domande attualmente note. - - Restituisce - ------- - question_ids: List[str] - La lista degli identificatori di domanda risolti. - current_questions: pd.DataFrame - DataFrame aggiornato che include eventuali nuove domande create. - new_added: int - Numero di domande create durante il processo. - existing_found: int - Numero di domande già esistenti trovate. - warnings: List[str] - Eventuali avvisi riscontrati durante la risoluzione. - """ - warnings: List[str] = [] - question_ids: List[str] = [] - new_added = 0 - existing_found = 0 - - for q_idx, q_data in enumerate(questions_in_set_data): - if isinstance(q_data, dict): - q_id = str(q_data.get("id", "")) - q_text = q_data.get("domanda", "") - q_answer = q_data.get("risposta_attesa", "") - q_category = q_data.get("categoria", "") - else: - q_id = str(q_data) - q_text = "" - q_answer = "" - q_category = "" - - if not q_id: - warnings.append( - f"Domanda #{q_idx + 1} senza ID (saltata)." - ) - continue - - if q_text and q_answer: - if q_id in current_questions["id"].astype(str).values: - existing_found += 1 - question_ids.append(q_id) - else: - was_added = add_question_if_not_exists( - question_id=q_id, - domanda=q_text, - risposta_attesa=q_answer, - categoria=q_category, - ) - if was_added: - new_added += 1 - question_ids.append(q_id) - new_row = pd.DataFrame( - { - "id": [q_id], - "domanda": [q_text], - "risposta_attesa": [q_answer], - "categoria": [q_category], - } - ) - current_questions = pd.concat([current_questions, new_row], ignore_index=True) - else: - existing_found += 1 - question_ids.append(q_id) - continue - - if q_id in current_questions["id"].astype(str).values: - existing_found += 1 - question_ids.append(q_id) - else: - warnings.append( - f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." - ) - - return question_ids, current_questions, new_added, existing_found, warnings - - -def persist_sets( - sets_data: List[Dict[str, Any]], - current_questions: pd.DataFrame, - current_sets: pd.DataFrame, -) -> Dict[str, Any]: - """Crea set di domande dai dati analizzati. - - Parametri - ---------- - sets_data: - Dati elaborati che descrivono i set da creare. - current_questions: - DataFrame delle domande attualmente note. - current_sets: - DataFrame dei set di domande esistenti. - - Restituisce - ------- - Dict[str, Any] - Un dizionario contenente conteggi, avvisi, flag di successo e - i DataFrame aggiornati per domande e set. - """ - sets_imported_count = 0 - new_questions_added_count = 0 - existing_questions_found_count = 0 - warnings: List[str] = [] - - for set_idx, set_data in enumerate(sets_data): - if not isinstance(set_data, dict): - warnings.append( - f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." - ) - continue - - set_name = set_data.get("name") - questions_in_set_data = set_data.get("questions", []) - - if not set_name or not isinstance(set_name, str) or not set_name.strip(): - warnings.append( - f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." - ) - continue - - if not isinstance(questions_in_set_data, list): - warnings.append( - f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." - ) - continue - - if set_name in current_sets.get("name", pd.Series([])).values: - warnings.append( - f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." - ) - continue - - question_ids, current_questions, added, existing, q_warnings = resolve_question_ids( - questions_in_set_data, current_questions - ) - warnings.extend(q_warnings) - - if question_ids or len(questions_in_set_data) == 0: - try: - QuestionSet.create(set_name, question_ids) - sets_imported_count += 1 - except Exception as e: - warnings.append( - f"Errore durante la creazione del set '{set_name}': {e}" - ) - else: - warnings.append( - f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." - ) - - new_questions_added_count += added - existing_questions_found_count += existing - - sets_df = refresh_question_sets() - - success = sets_imported_count > 0 or new_questions_added_count > 0 - success_message = "" - if success: - parts = [] - if sets_imported_count > 0: - parts.append(f"{sets_imported_count} set importati") - if new_questions_added_count > 0: - parts.append(f"{new_questions_added_count} nuove domande aggiunte") - if existing_questions_found_count > 0: - parts.append( - f"{existing_questions_found_count} domande esistenti referenziate" - ) - success_message = ". ".join(parts) + "." - - return { - "sets_imported_count": sets_imported_count, - "new_questions_added_count": new_questions_added_count, - "existing_questions_found_count": existing_questions_found_count, - "questions_df": current_questions, - "sets_df": sets_df, - "warnings": warnings, - "success": success, - "success_message": success_message, - } diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index 1717288..88016a3 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -2,6 +2,7 @@ import sys from unittest.mock import patch +import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -9,26 +10,71 @@ @patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.update") -def test_update_question_success(mock_update, mock_refresh): - mock_update.return_value = True +@patch("controllers.question_controller.Question.add") +@patch("controllers.question_controller.load_questions") +def test_add_question_if_not_exists_existing( + mock_load_questions, mock_add, mock_refresh +): + mock_load_questions.return_value = pd.DataFrame({"id": ["123"]}) + + result = question_controller.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", + ) + + assert result is False + mock_add.assert_not_called() + mock_refresh.assert_not_called() - result = question_controller.update_question( - "qid", domanda="d", risposta_attesa="a", categoria="c" + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.add") +@patch("controllers.question_controller.load_questions") +def test_add_question_if_not_exists_new(mock_load_questions, mock_add, mock_refresh): + mock_load_questions.return_value = pd.DataFrame({"id": ["456"]}) + + result = question_controller.add_question_if_not_exists( + question_id="123", + domanda="dom", + risposta_attesa="ans", + categoria="cat", ) assert result is True - mock_update.assert_called_once_with("qid", "d", "a", "c") + mock_add.assert_called_once_with("dom", "ans", "cat", "123") + mock_refresh.assert_called_once() + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.add") +def test_add_question(mock_add, mock_refresh): + mock_add.return_value = "qid" + + result = question_controller.add_question("dom", "ans", "cat", "qid") + + assert result == "qid" + mock_add.assert_called_once_with("dom", "ans", "cat", "qid") mock_refresh.assert_called_once() @patch("controllers.question_controller.refresh_questions") @patch("controllers.question_controller.Question.update") -def test_update_question_failure(mock_update, mock_refresh): - mock_update.return_value = False +def test_update_question(mock_update, mock_refresh): + mock_update.return_value = True - result = question_controller.update_question("qid") + result = question_controller.update_question("qid", "dom", "ans", "cat") - assert result is False - mock_update.assert_called_once_with("qid", None, None, None) + assert result is True + mock_update.assert_called_once_with("qid", "dom", "ans", "cat") + mock_refresh.assert_called_once() + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.delete") +def test_delete_question(mock_delete, mock_refresh): + question_controller.delete_question("qid") + + mock_delete.assert_called_once_with("qid") mock_refresh.assert_called_once() diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py index a7b5950..56db2e3 100644 --- a/tests/test_question_set_controller.py +++ b/tests/test_question_set_controller.py @@ -1,22 +1,37 @@ -from controllers import question_controller, question_set_controller +import os +import sys +from unittest.mock import patch +sys.path.append(os.path.dirname(os.path.dirname(__file__))) -def test_create_update_delete_set(): - qid1 = question_controller.add_question("Q1", "A1") - qid2 = question_controller.add_question("Q2", "A2") +from controllers import question_set_controller # noqa: E402 - set_id = question_set_controller.create_set("Set1", [qid1, qid2]) - sets = question_set_controller.load_sets() - row = sets[sets["id"] == set_id].iloc[0] - assert row["name"] == "Set1" - assert set(row["questions"]) == {qid1, qid2} - question_set_controller.update_set(set_id, name="Set2", question_ids=[qid2]) - sets2 = question_set_controller.load_sets() - row2 = sets2[sets2["id"] == set_id].iloc[0] - assert row2["name"] == "Set2" - assert row2["questions"] == [qid2] +@patch("controllers.question_set_controller.refresh_question_sets") +@patch("controllers.question_set_controller.QuestionSet.create") +def test_create_set_controller(mock_create, mock_refresh): + mock_create.return_value = "sid" - question_set_controller.delete_set(set_id) - sets3 = question_set_controller.load_sets() - assert set_id not in sets3["id"].values + result = question_set_controller.create_set("name", ["q1"]) + + assert result == "sid" + mock_create.assert_called_once_with("name", ["q1"]) + mock_refresh.assert_called_once() + + +@patch("controllers.question_set_controller.refresh_question_sets") +@patch("controllers.question_set_controller.QuestionSet.update") +def test_update_set_controller(mock_update, mock_refresh): + question_set_controller.update_set("sid", name="name", question_ids=["q1"]) + + mock_update.assert_called_once_with("sid", "name", ["q1"]) + mock_refresh.assert_called_once() + + +@patch("controllers.question_set_controller.refresh_question_sets") +@patch("controllers.question_set_controller.QuestionSet.delete") +def test_delete_set_controller(mock_delete, mock_refresh): + question_set_controller.delete_set("sid") + + mock_delete.assert_called_once_with("sid") + mock_refresh.assert_called_once() diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py index 3f28ad3..d8f8a1c 100644 --- a/tests/test_question_set_importer.py +++ b/tests/test_question_set_importer.py @@ -6,13 +6,14 @@ import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) -import pandas as pd -import pytest +import pandas as pd # noqa: E402 +import pytest # noqa: E402 -from services.question_set_importer import ( +from controllers.question_set_controller import ( # noqa: E402 parse_input, resolve_question_ids, persist_sets, + import_sets_from_file, ) @@ -32,7 +33,7 @@ def test_parse_input_json_not_list(): parse_input(file) -@patch("services.question_set_importer.add_question_if_not_exists") +@patch("controllers.question_set_controller.add_question_if_not_exists") def test_resolve_question_ids_adds_and_existing(mock_add): mock_add.return_value = True current_questions = pd.DataFrame( @@ -68,8 +69,8 @@ def test_resolve_question_ids_missing_id(): assert updated_df.empty -@patch("services.question_set_importer.refresh_question_sets") -@patch("services.question_set_importer.QuestionSet.create") +@patch("controllers.question_set_controller.refresh_question_sets") +@patch("controllers.question_set_controller.QuestionSet.create") def test_persist_sets_skips_duplicates(mock_create, mock_refresh): mock_refresh.return_value = pd.DataFrame( [{"id": "s1", "name": "Existing", "questions": []}] @@ -88,3 +89,41 @@ def test_persist_sets_skips_duplicates(mock_create, mock_refresh): assert result["sets_imported_count"] == 1 assert any("esiste già" in w for w in result["warnings"]) mock_create.assert_called_once_with("New", []) + + +def test_import_sets_from_file_none(): + result = import_sets_from_file(None) + assert not result["success"] + assert "Nessun file" in result["error_message"] + + +def test_import_sets_from_file_invalid_json(): + file = io.BytesIO(b"not json") + file.name = "bad.json" + result = import_sets_from_file(file) + assert result["error_message"] == "Il formato del file json non è valido" + assert not result["success"] + + +def test_import_sets_from_file_duplicates_no_error(): + data = [{"name": "Existing", "questions": []}] + file = io.BytesIO(json.dumps(data).encode("utf-8")) + file.name = "test.json" + with ( + patch("controllers.question_set_controller.load_questions") as mock_lq, + patch("controllers.question_set_controller.load_sets") as mock_ls, + patch("controllers.question_set_controller.persist_sets") as mock_ps, + ): + mock_lq.return_value = pd.DataFrame() + mock_ls.return_value = pd.DataFrame() + mock_ps.return_value = { + "success": False, + "success_message": "", + "questions_df": pd.DataFrame(), + "sets_df": pd.DataFrame(), + "warnings": ["dup"], + } + result = import_sets_from_file(file) + assert result["error_message"] == "" + assert result["warnings"] == ["dup"] + assert not result["success"] diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 6b1f9f3..6ec394b 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -3,7 +3,7 @@ import pytest sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -from controllers.test_controller import calculate_statistics # noqa: E402 +from controllers import calculate_statistics # noqa: E402 def test_calculate_statistics(): From a49df05190f502a126593526a544aa98a014912e Mon Sep 17 00:00:00 2001 From: oniichan Date: Wed, 6 Aug 2025 12:13:14 +0200 Subject: [PATCH 04/41] import bug fixed --- app.py | 6 +- controllers/__init__.py | 3 + controllers/api_preset_controller.py | 3 + controllers/db_controller.py | 6 - controllers/openai_controller.py | 147 -------- controllers/question_controller.py | 14 +- controllers/question_set_controller.py | 26 +- controllers/startup_controller.py | 11 + controllers/test_controller.py | 22 +- docker-compose.yml | 2 +- initialize_db.py | 16 +- logging_config.py | 9 - models/__init__.py | 2 + models/api_preset.py | 3 + models/cached_data.py | 3 + models/db_utils.py | 22 +- models/openai_service.py | 285 --------------- models/orm_models.py | 3 + models/question.py | 3 + models/question_set.py | 3 + models/test_result.py | 3 + tests/conftest.py | 17 - tests/test_evaluation_service.py | 88 ----- tests/test_openai_controller.py | 112 ------ tests/test_placeholder.py | 4 - tests/test_question_controller.py | 17 + tests/test_question_service.py | 40 --- tests/test_question_set_importer.py | 8 + view/__init__.py | 1 - view/api_configurazione.py | 284 --------------- view/component_utils.py | 176 ---------- view/esecuzione_test.py | 168 --------- view/gestione_domande.py | 295 ---------------- view/gestione_set.py | 396 --------------------- view/home.py | 96 ----- view/session_state.py | 30 -- view/set_helpers.py | 136 -------- view/state_models.py | 47 --- view/style_utils.py | 372 -------------------- view/ui_utils.py | 10 - view/visualizza_risultati.py | 465 ------------------------- 41 files changed, 135 insertions(+), 3219 deletions(-) delete mode 100644 controllers/db_controller.py delete mode 100644 controllers/openai_controller.py delete mode 100644 logging_config.py delete mode 100644 models/openai_service.py delete mode 100644 tests/conftest.py delete mode 100644 tests/test_evaluation_service.py delete mode 100644 tests/test_openai_controller.py delete mode 100644 tests/test_placeholder.py delete mode 100644 tests/test_question_service.py delete mode 100644 view/__init__.py delete mode 100644 view/api_configurazione.py delete mode 100644 view/component_utils.py delete mode 100644 view/esecuzione_test.py delete mode 100644 view/gestione_domande.py delete mode 100644 view/gestione_set.py delete mode 100644 view/home.py delete mode 100644 view/session_state.py delete mode 100644 view/set_helpers.py delete mode 100644 view/state_models.py delete mode 100644 view/style_utils.py delete mode 100644 view/ui_utils.py delete mode 100644 view/visualizza_risultati.py diff --git a/app.py b/app.py index 13bf145..9f6c543 100644 --- a/app.py +++ b/app.py @@ -12,10 +12,12 @@ ) from views.session_state import initialize_session_state from views.style_utils import add_global_styles -from logging_config import setup_logging +from controllers.startup_controller import setup_logging + +logger = logging.getLogger(__name__) setup_logging() -logging.info("Applicazione avviata") +logger.info("Applicazione avviata") # Imposta la configurazione della pagina st.set_page_config( diff --git a/controllers/__init__.py b/controllers/__init__.py index 76f64ef..930c7fe 100644 --- a/controllers/__init__.py +++ b/controllers/__init__.py @@ -1,6 +1,8 @@ """Expose controller utilities for external use.""" # API preset management +import logging + from .api_preset_controller import ( load_presets, refresh_api_presets, @@ -47,6 +49,7 @@ # Import helpers from .startup_controller import get_initial_state +logger = logging.getLogger(__name__) __all__ = [ diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py index b23b9a9..3475a7f 100644 --- a/controllers/api_preset_controller.py +++ b/controllers/api_preset_controller.py @@ -1,5 +1,7 @@ """Utility per la gestione dei preset API.""" +import logging + import uuid from typing import List, Optional, Tuple @@ -13,6 +15,7 @@ from openai import APIConnectionError, APIStatusError, RateLimitError from . import openai_client +logger = logging.getLogger(__name__) def load_presets() -> pd.DataFrame: diff --git a/controllers/db_controller.py b/controllers/db_controller.py deleted file mode 100644 index a644818..0000000 --- a/controllers/db_controller.py +++ /dev/null @@ -1,6 +0,0 @@ -from models.db_utils import init_db - - -def initialize_database(): - """Inizializza il database creando le tabelle necessarie.""" - init_db() diff --git a/controllers/openai_controller.py b/controllers/openai_controller.py deleted file mode 100644 index 578474d..0000000 --- a/controllers/openai_controller.py +++ /dev/null @@ -1,147 +0,0 @@ -import logging -from openai import APIConnectionError, RateLimitError, APIStatusError - -from services import evaluation_service, openai_service - -__all__ = [ - "evaluate_answer", - "generate_example_answer_with_llm", - "test_api_connection", -] - - -def evaluate_answer( - question: str, - expected_answer: str, - actual_answer: str, - client_config: dict, - show_api_details: bool = False, -): - """Delega la valutazione della risposta a services.evaluation_service.""" - return evaluation_service.evaluate_answer( - question, expected_answer, actual_answer, client_config, show_api_details - ) - - -def generate_example_answer_with_llm( - question: str, client_config: dict, show_api_details: bool = False -): - """Genera una risposta di esempio per una domanda utilizzando un LLM.""" - client = openai_service.get_openai_client( - api_key=client_config.get("api_key"), - base_url=client_config.get("endpoint"), - ) - if not client: - logging.error("Client API per la generazione risposte non configurato.") - return { - "answer": None, - "api_details": {"error": "Client API non configurato"} - if show_api_details - else None, - } - - if question is None or not isinstance(question, str) or question.strip() == "": - logging.error("La domanda fornita \u00e8 vuota o non valida.") - return { - "answer": None, - "api_details": {"error": "Domanda vuota o non valida"} - if show_api_details - else None, - } - - prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" - - api_request_details = { - "model": client_config.get("model", openai_service.DEFAULT_MODEL), - "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.7), - "max_tokens": client_config.get("max_tokens", 500), - } - - api_details_for_log = {} - if show_api_details: - api_details_for_log["request"] = api_request_details.copy() - - try: - response = client.chat.completions.create(**api_request_details) - answer = ( - response.choices[0].message.content.strip() - if response.choices and response.choices[0].message.content - else None - ) - if show_api_details: - api_details_for_log["response_content"] = ( - response.choices[0].message.content - if response.choices - else "Nessun contenuto" - ) - return { - "answer": answer, - "api_details": api_details_for_log if show_api_details else None, - } - - except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error( - f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" - ) - if show_api_details: - api_details_for_log["error"] = str(e) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } - except Exception as exc: - logging.error( - f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" - ) - if show_api_details: - api_details_for_log["error"] = str(exc) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } - - -def test_api_connection( - api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int -): - """Testa la connessione all'API LLM con i parametri forniti.""" - client = openai_service.get_openai_client(api_key=api_key, base_url=endpoint) - if not client: - return False, "Client API non inizializzato. Controlla chiave API e endpoint." - - try: - response = client.chat.completions.create( - model=model, - messages=[ - { - "role": "user", - "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'", - } - ], - temperature=temperature, - max_tokens=max_tokens, - ) - content = response.choices[0].message.content or "" - if "Connessione riuscita." in content: - return True, "Connessione API riuscita!" - else: - return ( - False, - "Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): " - f"{content[:200]}...", - ) - except APIConnectionError as e: - return False, f"Errore di connessione API: {e}" - except RateLimitError as e: - return False, f"Errore di Rate Limit API: {e}" - except APIStatusError as e: - return ( - False, - "Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', " - f"autenticazione fallita, quota superata): {e.status_code} - {e.message}", - ) - except Exception as exc: - return False, ( - f"Errore imprevisto durante il test della connessione: {type(exc).__name__} - {exc}" - ) diff --git a/controllers/question_controller.py b/controllers/question_controller.py index f2edfbd..4465da9 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,4 +1,6 @@ """Controller per la gestione delle domande senza layer di service.""" + +import logging from typing import Optional, Tuple, List import json @@ -13,6 +15,8 @@ refresh_questions as _refresh_questions, ) +logger = logging.getLogger(__name__) + def load_questions() -> pd.DataFrame: """Restituisce tutte le domande utilizzando la cache.""" @@ -105,9 +109,15 @@ def _import(file) -> Tuple[bool, str]: imported_df = None if file_extension == ".csv": - imported_df = pd.read_csv(file) + try: + imported_df = pd.read_csv(file) + except Exception: + return False, "Il formato del file csv non è valido" elif file_extension == ".json": - data = json.load(file) + try: + data = json.load(file) + except Exception: + return False, "Il formato del file json non è valido" if isinstance(data, list): imported_df = pd.DataFrame(data) elif ( diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 9a266fb..0eaa453 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -1,3 +1,4 @@ +import logging import os import json from typing import List, Optional, Any, Dict, Tuple @@ -11,6 +12,7 @@ refresh_question_sets as _refresh_question_sets, ) +logger = logging.getLogger(__name__) REQUIRED_CSV_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] @@ -53,7 +55,11 @@ def parse_input(uploaded_file) -> List[Dict[str, Any]]: file_extension = os.path.splitext(uploaded_file.name)[1].lower() if file_extension == ".csv": - df = pd.read_csv(uploaded_file) + try: + df = pd.read_csv(uploaded_file) + except Exception as e: # pragma: no cover - handled as generic csv error + raise ValueError("Il formato del file csv non è valido") from e + missing = [c for c in REQUIRED_CSV_COLUMNS if c not in df.columns] if missing: raise ValueError( @@ -80,12 +86,14 @@ def parse_input(uploaded_file) -> List[Dict[str, Any]]: sets_dict.setdefault(name, []).append(question) return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] - string_data = uploaded_file.getvalue().decode("utf-8") - data = json.loads(string_data) + try: + string_data = uploaded_file.getvalue().decode("utf-8") + data = json.loads(string_data) + except Exception as e: # pragma: no cover - handled as generic json error + raise ValueError("Il formato del file json non è valido") from e + if not isinstance(data, list): - raise ValueError( - "Formato JSON non valido. Il file deve contenere una lista (array) di set." - ) + raise ValueError("Il formato del file json non è valido") return data @@ -274,10 +282,8 @@ def import_sets_from_file(uploaded_file) -> Dict[str, Any]: "warnings": persist_result["warnings"], } ) - except json.JSONDecodeError: - result["error_message"] = "Il formato del file json non è valido" - except ValueError: - result["error_message"] = "Il formato del file json non è valido" + except ValueError as e: + result["error_message"] = str(e) except Exception as e: # pragma: no cover - general protection result["error_message"] = f"Errore imprevisto durante l'importazione: {str(e)}" diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py index d052d83..f6f0461 100644 --- a/controllers/startup_controller.py +++ b/controllers/startup_controller.py @@ -1,3 +1,4 @@ +import logging import os from models.db_utils import init_db @@ -6,6 +7,16 @@ from controllers.test_controller import load_results from controllers.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT +logger = logging.getLogger(__name__) + + +def setup_logging(level: int = logging.INFO) -> None: + """Configura il logger root con un formato di base.""" + logging.basicConfig( + level=level, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + def get_initial_state() -> dict: """Inizializza il database e restituisce lo stato di default dell'applicazione.""" diff --git a/controllers/test_controller.py b/controllers/test_controller.py index 0eaacb2..a3748e9 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -2,8 +2,8 @@ from __future__ import annotations -import json import logging +import json import uuid from datetime import datetime from typing import Dict, List, Tuple @@ -19,6 +19,8 @@ refresh_results as _refresh_results, ) +logger = logging.getLogger(__name__) + def load_results() -> pd.DataFrame: """Restituisce i risultati dei test utilizzando la cache.""" @@ -208,7 +210,7 @@ def evaluate_answer( response = client.chat.completions.create(**api_request_details) choices = getattr(response, "choices", None) if not choices: - logging.error("Risposta API priva di 'choices' validi") + logger.error("Risposta API priva di 'choices' validi") if show_api_details: api_details_for_log["response_content"] = "" return { @@ -233,7 +235,7 @@ def evaluate_answer( "completeness", ] if not all(key in evaluation for key in required_keys): - logging.warning( + logger.warning( f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." ) for key in required_keys: @@ -247,7 +249,7 @@ def evaluate_answer( evaluation["api_details"] = api_details_for_log return evaluation except json.JSONDecodeError: - logging.error( + logger.error( f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" ) return { @@ -260,7 +262,7 @@ def evaluate_answer( } except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") + logger.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") api_details_for_log["error"] = str(e) return { "score": 0, @@ -271,7 +273,7 @@ def evaluate_answer( "api_details": api_details_for_log, } except Exception as exc: # noqa: BLE001 - logging.error( + logger.error( f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" ) api_details_for_log["error"] = str(exc) @@ -295,7 +297,7 @@ def generate_example_answer_with_llm( base_url=client_config.get("endpoint"), ) if not client: - logging.error("Client API per la generazione risposte non configurato.") + logger.error("Client API per la generazione risposte non configurato.") return { "answer": None, "api_details": {"error": "Client API non configurato"} @@ -304,7 +306,7 @@ def generate_example_answer_with_llm( } if question is None or not isinstance(question, str) or question.strip() == "": - logging.error("La domanda fornita è vuota o non valida.") + logger.error("La domanda fornita è vuota o non valida.") return { "answer": None, "api_details": {"error": "Domanda vuota o non valida"} @@ -344,7 +346,7 @@ def generate_example_answer_with_llm( } except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error( + logger.error( f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" ) if show_api_details: @@ -354,7 +356,7 @@ def generate_example_answer_with_llm( "api_details": api_details_for_log if show_api_details else None, } except Exception as exc: # noqa: BLE001 - logging.error( + logger.error( f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" ) if show_api_details: diff --git a/docker-compose.yml b/docker-compose.yml index 0e4d95d..f0920a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: ports: - "3306:3306" volumes: - - db_data:/var/lib/mysql + - ./data/:/var/lib/mysql environment: MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' MYSQL_ROOT_HOST: '%' diff --git a/initialize_db.py b/initialize_db.py index f689b4d..cb58e53 100644 --- a/initialize_db.py +++ b/initialize_db.py @@ -1,21 +1,23 @@ import logging -from logging_config import setup_logging +from controllers.startup_controller import setup_logging + +logger = logging.getLogger(__name__) try: from models.db_utils import init_db except ModuleNotFoundError as exc: - logging.error( + logger.error( "Modulo mancante. Installa le dipendenze con 'pip install -r requirements.txt'" ) - logging.error(f"Errore specifico: {exc}") + logger.error(f"Errore specifico: {exc}") raise exc if __name__ == '__main__': setup_logging() - logging.info("Inizializzazione del database in corso...") + logger.info("Inizializzazione del database in corso...") try: init_db() - logging.info("Database inizializzato con successo!") + logger.info("Database inizializzato con successo!") except Exception as e: - logging.error(f"Errore durante l'inizializzazione del database: {e}") - logging.exception("Traceback dettagliato:") + logger.error(f"Errore durante l'inizializzazione del database: {e}") + logger.exception("Traceback dettagliato:") diff --git a/logging_config.py b/logging_config.py deleted file mode 100644 index 6915a26..0000000 --- a/logging_config.py +++ /dev/null @@ -1,9 +0,0 @@ -import logging - - -def setup_logging(level: int = logging.INFO) -> None: - """Configura il logger root con un formato di base.""" - logging.basicConfig( - level=level, - format="%(asctime)s - %(levelname)s - %(message)s", - ) diff --git a/models/__init__.py b/models/__init__.py index e69de29..ab60af6 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -0,0 +1,2 @@ +import logging +logger = logging.getLogger(__name__) diff --git a/models/api_preset.py b/models/api_preset.py index a9d8734..92fd24f 100644 --- a/models/api_preset.py +++ b/models/api_preset.py @@ -1,3 +1,5 @@ +import logging + from dataclasses import dataclass from typing import List import pandas as pd @@ -5,6 +7,7 @@ from models.db_utils import get_session from models.orm_models import APIPresetORM +logger = logging.getLogger(__name__) @dataclass diff --git a/models/cached_data.py b/models/cached_data.py index accd4c4..1f5430d 100644 --- a/models/cached_data.py +++ b/models/cached_data.py @@ -1,7 +1,10 @@ +import logging + from models.api_preset import APIPreset from models.question import Question from models.question_set import QuestionSet from models.test_result import TestResult +logger = logging.getLogger(__name__) def get_questions(): diff --git a/models/db_utils.py b/models/db_utils.py index 9eb424c..14c2524 100644 --- a/models/db_utils.py +++ b/models/db_utils.py @@ -1,17 +1,33 @@ +import logging + import configparser from pathlib import Path from sqlalchemy import create_engine, text from sqlalchemy.orm import declarative_base, sessionmaker +logger = logging.getLogger(__name__) def _ensure_database(cfg): - """Create the target database if it does not exist.""" + """Crea il database di destinazione se non esiste.""" root_url = ( f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" ) engine = create_engine(root_url, isolation_level="AUTOCOMMIT") - with engine.begin() as conn: - conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + try: + with engine.begin() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + except Exception as exc: + logger.exception( + "Impossibile creare il database '%s' sull'host '%s' con l'utente '%s'", + cfg.get('database'), + cfg.get('host'), + cfg.get('user'), + ) + raise RuntimeError( + f"Impossibile creare il database '{cfg.get('database')}' sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " + "Il server del database potrebbe essere irraggiungibile, le credenziali potrebbero non essere valide " + "oppure l'utente potrebbe non avere privilegi sufficienti.", + ) from exc Base = declarative_base() diff --git a/models/openai_service.py b/models/openai_service.py deleted file mode 100644 index b378645..0000000 --- a/models/openai_service.py +++ /dev/null @@ -1,285 +0,0 @@ -import os -import json -import logging -from openai import OpenAI, APIConnectionError, RateLimitError, APIStatusError - -DEFAULT_MODEL = "gpt-4o" -DEFAULT_ENDPOINT = "https://api.openai.com/v1" - -# Modelli disponibili per diversi provider (esempio) -OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] -ANTHROPIC_MODELS = ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"] -# Aggiungi altri provider e modelli se necessario -# XAI_MODELS = ["grok-1"] - -def get_openai_client(api_key: str, base_url: str = None): - """ - Crea e restituisce un client OpenAI configurato. - Args: - api_key: La chiave API. - base_url: L'URL base dell'endpoint API (opzionale, default a OpenAI). - Returns: - Un'istanza del client OpenAI o None se la chiave API non è fornita. - """ - if not api_key: - # Commentato per ridurre output UI - logging.warning("Tentativo di creare client OpenAI senza chiave API.") - return None - try: - # Se base_url è None, "custom", o vuoto, usa il default di OpenAI. - # Altrimenti, usa il base_url fornito. - effective_base_url = base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT - return OpenAI(api_key=api_key, base_url=effective_base_url) - except Exception as e: - logging.error(f"Errore durante la creazione del client OpenAI: {e}") - return None - -def evaluate_answer(question: str, expected_answer: str, actual_answer: str, - client_config: dict, show_api_details: bool = False): - """ - Valuta una risposta utilizzando un LLM specificato tramite client_config. - Args: - question: La domanda. - expected_answer: La risposta attesa. - actual_answer: La risposta effettiva da valutare. - client_config: Dizionario contenente {api_key, endpoint, model, temperature, max_tokens}. - show_api_details: Se True, include i dettagli della richiesta/risposta API. - Returns: - Un dizionario con il punteggio e la spiegazione, o un risultato di errore. - """ - client = get_openai_client(api_key=client_config.get("api_key"), base_url=client_config.get("endpoint")) - if not client: - return {"score": 0, "explanation": "Errore: Client API per la valutazione non configurato.", "similarity": 0, "correctness": 0, "completeness": 0} - - prompt = f""" - Sei un valutatore esperto che valuta la qualità delle risposte alle domande. - Domanda: {question} - Risposta Attesa: {expected_answer} - Risposta Effettiva: {actual_answer} - - Valuta la risposta effettiva rispetto alla risposta attesa in base a: - 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? - 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? - 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? - Calcola un punteggio complessivo (0-100) basato su queste metriche. - Fornisci una breve spiegazione della tua valutazione (max 100 parole). - Formatta la tua risposta come un oggetto JSON con questi campi: - - score: il punteggio complessivo (numero) - - explanation: la tua spiegazione (stringa) - - similarity: punteggio di somiglianza (numero) - - correctness: punteggio di correttezza (numero) - - completeness: punteggio di completezza (numero) - Esempio di risposta JSON: - {{ - "score": 95, - "explanation": "La risposta è corretta e completa", - "similarity": 90, - "correctness": 100, - "completeness": 95 - }} - """ - - api_request_details = { - "model": client_config.get("model", DEFAULT_MODEL), - "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.0), - "max_tokens": client_config.get("max_tokens", 250), # Aumentato leggermente per JSON più complesso - "response_format": {"type": "json_object"} - } - - api_details_for_log = {} - if show_api_details: - # Copia i dettagli della richiesta per loggarli, escludendo dati sensibili se necessario - # (in questo caso, la chiave API è gestita dal client e non è direttamente nei dettagli della richiesta) - api_details_for_log["request"] = api_request_details.copy() - - try: - response = client.chat.completions.create(**api_request_details) - content = response.choices[0].message.content or "{}" - if show_api_details: - api_details_for_log["response_content"] = content - - try: - evaluation = json.loads(content) - required_keys = ['score', 'explanation', 'similarity', 'correctness', 'completeness'] - if not all(key in evaluation for key in required_keys): - logging.warning( - f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." - ) - for key in required_keys: - if key not in evaluation: - evaluation[key] = 0 if key != 'explanation' else "Valutazione incompleta o formato JSON non corretto." - - evaluation['api_details'] = api_details_for_log - return evaluation - except json.JSONDecodeError: - logging.error( - f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" - ) - return { - "score": 0, "explanation": f"Errore di decodifica JSON: {content[:100]}...", - "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log - } - - except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error( - f"Errore API durante la valutazione: {type(e).__name__} - {e}" - ) - api_details_for_log["error"] = str(e) - return { - "score": 0, "explanation": f"Errore API: {type(e).__name__}", - "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log - } - except Exception as e: - logging.error( - f"Errore imprevisto durante la valutazione: {type(e).__name__} - {e}" - ) - api_details_for_log["error"] = str(e) - return { - "score": 0, "explanation": f"Errore imprevisto: {type(e).__name__}", - "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log - } - -def generate_example_answer_with_llm(question: str, client_config: dict, show_api_details: bool = False): - """ - Genera una risposta di esempio per una domanda utilizzando un LLM. - Args: - question: La domanda per cui generare una risposta. - client_config: Dizionario contenente {api_key, endpoint, model, temperature, max_tokens}. - show_api_details: Se True, include i dettagli della chiamata API nel risultato. - Returns: - Un dizionario con { "answer": "risposta generata" | None, "api_details": {...} | None }. - """ - client = get_openai_client(api_key=client_config.get("api_key"), base_url=client_config.get("endpoint")) - if not client: - logging.error("Client API per la generazione risposte non configurato.") - return { - "answer": None, - "api_details": {"error": "Client API non configurato"} - if show_api_details - else None, - } - - # Controllo se la domanda è None o una stringa vuota - if question is None or not isinstance(question, str) or question.strip() == "": - logging.error("La domanda fornita è vuota o non valida.") - return { - "answer": None, - "api_details": {"error": "Domanda vuota o non valida"} - if show_api_details - else None, - } - - prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" - - api_request_details = { - "model": client_config.get("model", DEFAULT_MODEL), - "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.7), - "max_tokens": client_config.get("max_tokens", 500) - } - - api_details_for_log = {} - if show_api_details: - api_details_for_log["request"] = api_request_details.copy() - - try: - response = client.chat.completions.create(**api_request_details) - answer = response.choices[0].message.content.strip() if response.choices and response.choices[0].message.content else None - if show_api_details: - api_details_for_log["response_content"] = response.choices[0].message.content if response.choices else "Nessun contenuto" - return {"answer": answer, "api_details": api_details_for_log if show_api_details else None} - - except (APIConnectionError, RateLimitError, APIStatusError) as e: - logging.error( - f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" - ) - if show_api_details: - api_details_for_log["error"] = str(e) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } - except Exception as e: - logging.error( - f"Errore imprevisto durante la generazione della risposta: {type(e).__name__} - {e}" - ) - if show_api_details: - api_details_for_log["error"] = str(e) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } - -def test_api_connection(api_key: str, endpoint: str, model: str, temperature: float, max_tokens: int): - """ - Testa la connessione all'API LLM con i parametri forniti. - """ - client = get_openai_client(api_key=api_key, base_url=endpoint) - if not client: - return False, "Client API non inizializzato. Controlla chiave API e endpoint." - - try: - response = client.chat.completions.create( - model=model, - messages=[{"role": "user", "content": "Test connessione. Rispondi solo con: 'Connessione riuscita.'"}], - temperature=temperature, - max_tokens=max_tokens # Assicurati che sia sufficiente per la risposta attesa - ) - content = response.choices[0].message.content or "" - if "Connessione riuscita." in content: - return True, "Connessione API riuscita!" - else: - return False, f"Risposta inattesa dall'API (potrebbe indicare un problema con il modello o l'endpoint): {content[:200]}..." - except APIConnectionError as e: - return False, f"Errore di connessione API: {e}" - except RateLimitError as e: - return False, f"Errore di Rate Limit API: {e}" - except APIStatusError as e: - return False, f"Errore di stato API (es. modello '{model}' non valido per l'endpoint '{endpoint}', autenticazione fallita, quota superata): {e.status_code} - {e.message}" - except Exception as e: - return False, f"Errore imprevisto durante il test della connessione: {type(e).__name__} - {e}" - -def get_available_models_for_endpoint(provider_name: str, endpoint_url: str = None, api_key: str = None): - """ - Restituisce una lista di modelli disponibili basata sul provider o tenta di elencarli dall'endpoint. - Args: - provider_name: Nome del provider (es. "OpenAI", "Anthropic", "Personalizzato"). - endpoint_url: URL dell'endpoint (rilevante per "Personalizzato"). - api_key: Chiave API per autenticarsi (necessaria per elencare modelli da endpoint personalizzati). - Returns: - Una lista di stringhe di nomi di modelli. - """ - if provider_name == "OpenAI": - return OPENAI_MODELS - elif provider_name == "Anthropic": - return ANTHROPIC_MODELS - # Aggiungi altri provider predefiniti qui - # elif provider_name == "XAI": - # return XAI_MODELS - elif provider_name == "Personalizzato": - if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): - # Se non ci sono informazioni sufficienti, restituisce una lista di fallback - return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] - - client = get_openai_client(api_key=api_key, base_url=endpoint_url) - if not client: - return ["(Errore creazione client API)", DEFAULT_MODEL] - try: - models = client.models.list() - # Filtra per modelli che non sono di embedding - filtered_models = sorted([ - model.id for model in models - if not any(term in model.id.lower() for term in ["embed", "embedding"]) - and (any(term in model.id.lower() for term in ["chat", "instruct", "gpt", "claude", "grok"]) or len(model.id.split('-')) > 2) - ]) - if not filtered_models: - # Se il filtro aggressivo non trova nulla, restituisci tutti i modelli non di embedding - filtered_models = sorted([model.id for model in models if not any(term in model.id.lower() for term in ["embed", "embedding"])]) - return filtered_models if filtered_models else [DEFAULT_MODEL] - except Exception as e: - return ["(Errore recupero modelli)", DEFAULT_MODEL] - return [DEFAULT_MODEL] # Default generale se il provider non è riconosciuto diff --git a/models/orm_models.py b/models/orm_models.py index b3414e1..07b85df 100644 --- a/models/orm_models.py +++ b/models/orm_models.py @@ -2,10 +2,13 @@ # mypy: ignore-errors +import logging + from sqlalchemy import Column, String, Text, Float, Integer, ForeignKey, Table, JSON from sqlalchemy.orm import relationship from .db_utils import Base +logger = logging.getLogger(__name__) # Tabella di associazione per la relazione molti-a-molti tra set e domande question_set_questions = Table( diff --git a/models/question.py b/models/question.py index a0fc0d4..7c16e30 100644 --- a/models/question.py +++ b/models/question.py @@ -1,3 +1,5 @@ +import logging + from dataclasses import dataclass from typing import List, Optional import uuid @@ -5,6 +7,7 @@ from models.db_utils import get_session from models.orm_models import QuestionORM, question_set_questions +logger = logging.getLogger(__name__) @dataclass diff --git a/models/question_set.py b/models/question_set.py index 0c30c15..156152f 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -1,3 +1,5 @@ +import logging + from dataclasses import dataclass, field from typing import List, Optional import uuid @@ -5,6 +7,7 @@ from models.db_utils import get_session from models.orm_models import QuestionSetORM, QuestionORM +logger = logging.getLogger(__name__) @dataclass diff --git a/models/test_result.py b/models/test_result.py index 87ea6da..97c2d4c 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -1,3 +1,5 @@ +import logging + from dataclasses import dataclass from typing import Dict, List import uuid @@ -7,6 +9,7 @@ from models.db_utils import get_session from models.orm_models import TestResultORM +logger = logging.getLogger(__name__) @dataclass diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 718566d..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,17 +0,0 @@ -import os, sys -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -import pytest -from sqlalchemy import create_engine -from models import db_utils, question, question_set - -@pytest.fixture(autouse=True) -def in_memory_db(monkeypatch): - engine = create_engine("sqlite:///:memory:") - # Patch get_engine in db_utils and imported references - monkeypatch.setattr(db_utils, "_engine", engine) - monkeypatch.setattr(db_utils, "get_engine", lambda: engine) - monkeypatch.setattr(question, "get_engine", lambda: engine) - monkeypatch.setattr(question_set, "get_engine", lambda: engine) - db_utils.init_db() - yield engine - engine.dispose() diff --git a/tests/test_evaluation_service.py b/tests/test_evaluation_service.py deleted file mode 100644 index aab6f64..0000000 --- a/tests/test_evaluation_service.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -import logging -import os -import sys -from unittest.mock import Mock, patch - -sys.path.append(os.path.dirname(os.path.dirname(__file__))) - -from services import evaluation_service - - -def _mock_response(content: str): - mock_resp = Mock() - mock_choice = Mock() - mock_choice.message = Mock() - mock_choice.message.content = content - mock_resp.choices = [mock_choice] - return mock_resp - - -def _mock_response_no_choices(): - mock_resp = Mock() - mock_resp.choices = [] - return mock_resp - - -@patch("services.evaluation_service.openai_service.get_openai_client") -def test_evaluate_answer_success(mock_get_client): - mock_client = Mock() - mock_get_client.return_value = mock_client - - evaluation = { - "score": 90, - "explanation": "good", - "similarity": 90, - "correctness": 90, - "completeness": 90, - } - mock_client.chat.completions.create.return_value = _mock_response( - json.dumps(evaluation) - ) - - result = evaluation_service.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"}, show_api_details=True - ) - - assert result["score"] == 90 - assert result["similarity"] == 90 - assert "api_details" in result - - -@patch("services.evaluation_service.openai_service.get_openai_client", return_value=None) -def test_evaluate_answer_no_client(mock_get_client): - result = evaluation_service.evaluate_answer( - "q", "expected", "actual", {"api_key": None} - ) - - assert result["score"] == 0 - assert "Client API" in result["explanation"] - - -@patch("services.evaluation_service.openai_service.get_openai_client") -def test_evaluate_answer_json_decode_error(mock_get_client): - mock_client = Mock() - mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response("not json") - - result = evaluation_service.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"} - ) - - assert result["score"] == 0 - assert "Errore di decodifica JSON" in result["explanation"] - - -@patch("services.evaluation_service.openai_service.get_openai_client") -def test_evaluate_answer_no_choices(mock_get_client, caplog): - mock_client = Mock() - mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response_no_choices() - - with caplog.at_level(logging.ERROR): - result = evaluation_service.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"} - ) - - assert result["score"] == 0 - assert "choices" in caplog.text diff --git a/tests/test_openai_controller.py b/tests/test_openai_controller.py deleted file mode 100644 index 1b9fce6..0000000 --- a/tests/test_openai_controller.py +++ /dev/null @@ -1,112 +0,0 @@ -import os -import sys -from unittest.mock import Mock, patch - -sys.path.append(os.path.dirname(os.path.dirname(__file__))) - -from controllers import openai_controller # noqa: E402 - - -def _mock_response(content: str): - mock_resp = Mock() - mock_choice = Mock() - mock_choice.message = Mock() - mock_choice.message.content = content - mock_resp.choices = [mock_choice] - return mock_resp - - -@patch("controllers.openai_controller.evaluation_service.evaluate_answer") -def test_evaluate_answer_delegates(mock_service): - evaluation = { - "score": 90, - "explanation": "good", - "similarity": 90, - "correctness": 90, - "completeness": 90, - } - mock_service.return_value = evaluation - - result = openai_controller.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"}, show_api_details=True - ) - - assert result == evaluation - mock_service.assert_called_once_with( - "q", "expected", "actual", {"api_key": "key"}, True - ) - - -@patch("services.openai_service.get_openai_client") -def test_generate_example_answer_success(mock_get_client): - mock_client = Mock() - mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response(" answer ") - - result = openai_controller.generate_example_answer_with_llm( - "question", {"api_key": "key"} - ) - - assert result["answer"] == "answer" - - -@patch("services.openai_service.get_openai_client", return_value=None) -def test_generate_example_answer_no_client(mock_get_client): - result = openai_controller.generate_example_answer_with_llm( - "question", {"api_key": None}, show_api_details=True - ) - - assert result["answer"] is None - assert result["api_details"]["error"] == "Client API non configurato" - - -@patch("services.openai_service.get_openai_client") -def test_generate_example_answer_empty_question(mock_get_client): - mock_get_client.return_value = Mock() - - result = openai_controller.generate_example_answer_with_llm( - "", {"api_key": "key"}, show_api_details=True - ) - - assert result["answer"] is None - assert result["api_details"]["error"] == "Domanda vuota o non valida" - - -@patch("services.openai_service.get_openai_client") -def test_test_api_connection_success(mock_get_client): - mock_client = Mock() - mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response( - "Connessione riuscita." - ) - - ok, msg = openai_controller.test_api_connection( - "key", "endpoint", "model", 0.1, 10 - ) - - assert ok is True - assert msg == "Connessione API riuscita!" - - -@patch("services.openai_service.get_openai_client") -def test_test_api_connection_unexpected_response(mock_get_client): - mock_client = Mock() - mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response("failure") - - ok, msg = openai_controller.test_api_connection( - "key", "endpoint", "model", 0.1, 10 - ) - - assert ok is False - assert "Risposta inattesa" in msg - - -@patch("services.openai_service.get_openai_client", return_value=None) -def test_test_api_connection_no_client(mock_get_client): - ok, msg = openai_controller.test_api_connection( - "key", "endpoint", "model", 0.1, 10 - ) - - assert ok is False - assert "Client API non inizializzato" in msg diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py deleted file mode 100644 index eac42c5..0000000 --- a/tests/test_placeholder.py +++ /dev/null @@ -1,4 +0,0 @@ -import pytest - -def test_placeholder(): - assert True diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index 88016a3..6fa7135 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -2,6 +2,7 @@ import sys from unittest.mock import patch +import io import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -78,3 +79,19 @@ def test_delete_question(mock_delete, mock_refresh): mock_delete.assert_called_once_with("qid") mock_refresh.assert_called_once() + + +def test_import_questions_from_file_invalid_json(): + file = io.StringIO("not json") + file.name = "bad.json" + success, message = question_controller.import_questions_from_file(file) + assert not success + assert message == "Il formato del file json non è valido" + + +def test_import_questions_from_file_invalid_csv(): + file = io.StringIO("id,domanda\n\"1,Test") + file.name = "bad.csv" + success, message = question_controller.import_questions_from_file(file) + assert not success + assert message == "Il formato del file csv non è valido" diff --git a/tests/test_question_service.py b/tests/test_question_service.py deleted file mode 100644 index e95eb04..0000000 --- a/tests/test_question_service.py +++ /dev/null @@ -1,40 +0,0 @@ -import pandas as pd -from unittest.mock import patch - -from services import question_service - - -@patch("services.question_service.refresh_questions") -@patch("services.question_service.Question.add") -@patch("services.question_service.Question.load_all") -def test_add_question_if_not_exists_existing(mock_load_all, mock_add, mock_refresh): - mock_load_all.return_value = pd.DataFrame({"id": ["123"]}) - - result = question_service.add_question_if_not_exists( - question_id="123", - domanda="dom", - risposta_attesa="ans", - categoria="cat", - ) - - assert result is False - mock_add.assert_not_called() - mock_refresh.assert_not_called() - - -@patch("services.question_service.refresh_questions") -@patch("services.question_service.Question.add") -@patch("services.question_service.Question.load_all") -def test_add_question_if_not_exists_new(mock_load_all, mock_add, mock_refresh): - mock_load_all.return_value = pd.DataFrame({"id": ["456"]}) - - result = question_service.add_question_if_not_exists( - question_id="123", - domanda="dom", - risposta_attesa="ans", - categoria="cat", - ) - - assert result is True - mock_add.assert_called_once_with("dom", "ans", "cat", "123") - mock_refresh.assert_called_once() diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py index d8f8a1c..b70b24b 100644 --- a/tests/test_question_set_importer.py +++ b/tests/test_question_set_importer.py @@ -105,6 +105,14 @@ def test_import_sets_from_file_invalid_json(): assert not result["success"] +def test_import_sets_from_file_invalid_csv(): + file = io.BytesIO(b"id,domanda\n\"1,Test") + file.name = "bad.csv" + result = import_sets_from_file(file) + assert result["error_message"] == "Il formato del file csv non è valido" + assert not result["success"] + + def test_import_sets_from_file_duplicates_no_error(): data = [{"name": "Existing", "questions": []}] file = io.BytesIO(json.dumps(data).encode("utf-8")) diff --git a/view/__init__.py b/view/__init__.py deleted file mode 100644 index c5fcef4..0000000 --- a/view/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""View package.""" diff --git a/view/api_configurazione.py b/view/api_configurazione.py deleted file mode 100644 index 852a6a5..0000000 --- a/view/api_configurazione.py +++ /dev/null @@ -1,284 +0,0 @@ -import streamlit as st - -from controllers.openai_controller import test_api_connection -from view.style_utils import add_page_header, add_section_title -from controllers.api_preset_controller import ( - save_preset, - delete_preset, - load_presets, - list_presets, - get_preset_by_id, - validate_preset, - get_default_api_settings, -) - -DEFAULT_API_SETTINGS = get_default_api_settings() -DEFAULT_MODEL = DEFAULT_API_SETTINGS["model"] -DEFAULT_ENDPOINT = DEFAULT_API_SETTINGS["endpoint"] - - -# Funzioni di callback per i pulsanti del form -def start_new_preset_edit(): - st.session_state.editing_preset = True - st.session_state.current_preset_edit_id = None # Indica nuovo preset - st.session_state.preset_form_data = { - "name": "", - "endpoint": DEFAULT_ENDPOINT, - "api_key": "", - "model": DEFAULT_MODEL, - "temperature": 0.0, - "max_tokens": 1000 - } - - -def start_existing_preset_edit(preset_id): - preset_to_edit = get_preset_by_id(preset_id, st.session_state.api_presets) - if not preset_to_edit: - st.error("Preset non trovato.") - return - st.session_state.editing_preset = True - st.session_state.current_preset_edit_id = preset_id - st.session_state.preset_form_data = preset_to_edit.copy() - # Assicura che i campi numerici siano del tipo corretto per gli slider/number_input - st.session_state.preset_form_data["temperature"] = float( - st.session_state.preset_form_data.get("temperature", 0.0) - ) - st.session_state.preset_form_data["max_tokens"] = int( - st.session_state.preset_form_data.get("max_tokens", 1000) - ) - if "endpoint" not in st.session_state.preset_form_data: - st.session_state.preset_form_data["endpoint"] = DEFAULT_ENDPOINT - - -def cancel_preset_edit(): - st.session_state.editing_preset = False - st.session_state.current_preset_edit_id = None - st.session_state.preset_form_data = {} - - -def save_preset_from_form(): - """Salva un preset leggendo i valori direttamente dagli input della form.""" - # Recupera sempre i valori correnti dei widget dal session_state - preset_name = st.session_state.get("preset_name", "").strip() - endpoint = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) - api_key = st.session_state.get("preset_api_key", "") - model = st.session_state.get("preset_model", DEFAULT_MODEL) - temperature = float( - st.session_state.get( - "preset_temperature", - st.session_state.preset_form_data.get("temperature", 0.0), - ) - ) - max_tokens = int( - st.session_state.get( - "preset_max_tokens", - st.session_state.preset_form_data.get("max_tokens", 1000), - ) - ) - - # Aggiorna il dizionario del form in sessione con i valori raccolti - st.session_state.preset_form_data.update( - { - "name": preset_name, - "endpoint": endpoint, - "api_key": api_key, - "model": model, - "temperature": temperature, - "max_tokens": max_tokens, - } - ) - - form_data = st.session_state.preset_form_data.copy() - current_id = st.session_state.current_preset_edit_id - - is_valid, validation_message = validate_preset(form_data, current_id) - if not is_valid: - st.error(validation_message) - return - - success, message, updated_df = save_preset(form_data, current_id) - if success: - st.session_state.api_presets = updated_df - st.success(message) - cancel_preset_edit() # Chiudi il form - else: - st.error(message) - - -def delete_preset_callback(preset_id): - success, message, updated_df = delete_preset(preset_id) - if success: - st.session_state.api_presets = updated_df - st.success(message) - if st.session_state.current_preset_edit_id == preset_id: - cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form - else: - st.error(message) - - -def render(): - add_page_header( - "Gestione Preset API", - icon="⚙️", - description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." - ) - - # Stato della sessione per la gestione del form di creazione/modifica preset - if "editing_preset" not in st.session_state: - st.session_state.editing_preset = False - if "current_preset_edit_id" not in st.session_state: - st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica - if "preset_form_data" not in st.session_state: - st.session_state.preset_form_data = {} - - # Carica i preset API utilizzando la cache - if 'api_presets' not in st.session_state: - st.session_state.api_presets = load_presets() - - # Sezione per visualizzare/modificare i preset - if st.session_state.editing_preset: - add_section_title("Modifica/Crea Preset API", icon="✏️") - form_data = st.session_state.preset_form_data - - with st.form(key="preset_form"): - # Usa un key specifico per il campo nome e aggiorna il form_data - form_data["name"] = st.text_input( - "Nome del Preset", - value=form_data.get("name", ""), - key="preset_name", # Key esplicita per il campo nome - help="Un nome univoco per questo preset." - ) - - # Campo chiave API con key esplicita - form_data["api_key"] = st.text_input( - "Chiave API", - value=form_data.get("api_key", ""), - type="password", - key="preset_api_key", # Key esplicita per la chiave API - help="La tua chiave API per il provider selezionato." - ) - - # Campo endpoint con key esplicita - form_data["endpoint"] = st.text_input( - "Provider Endpoint", - value=form_data.get("endpoint", DEFAULT_ENDPOINT), - placeholder="https://api.openai.com/v1", - key="preset_endpoint", # Key esplicita per l'endpoint - help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" - ) - - # Modello sempre personalizzabile - form_data["model"] = st.text_input( - "Modello", - value=form_data.get("model", DEFAULT_MODEL), - placeholder="gpt-4o", - key="preset_model", # Key esplicita per il modello - help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" - ) - - form_data["temperature"] = st.slider( - "Temperatura", - 0.0, - 2.0, - float(form_data.get("temperature", 0.0)), - 0.1, - key="preset_temperature", - ) - form_data["max_tokens"] = st.number_input( - "Max Tokens", - min_value=50, - max_value=8000, - value=int(form_data.get("max_tokens", 1000)), - step=50, - key="preset_max_tokens", - ) - - # Campo Test Connessione e pulsanti di salvataggio/annullamento - # Pulsante Test Connessione - if st.form_submit_button("⚡ Testa Connessione API"): - # Usa direttamente i valori dal session_state per il test - api_key_to_test = st.session_state.get("preset_api_key", "") - endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) - model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) - - with st.spinner("Test in corso..."): - success, message = test_api_connection( - api_key=api_key_to_test, - endpoint=endpoint_to_test, - model=model_to_test, - temperature=form_data.get("temperature", 0.0), - max_tokens=form_data.get("max_tokens", 1000) - ) - if success: - st.success(message) - else: - st.error(message) - - # Pulsanti Salva e Annulla - cols_form_buttons = st.columns(2) - with cols_form_buttons[0]: - if st.form_submit_button( - "💾 Salva Preset", - on_click=save_preset_from_form, - type="primary", - use_container_width=True, - ): - pass # Il callback gestisce il salvataggio - with cols_form_buttons[1]: - if st.form_submit_button( - "❌ Annulla", - on_click=cancel_preset_edit, - use_container_width=True, - ): - pass # Il callback gestisce il cambio di stato - else: - add_section_title("Preset API Salvati", icon="🗂️") - if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): - pass # Il callback gestisce il cambio di stato - - preset_list = list_presets(st.session_state.api_presets) - if not preset_list: - st.info( - "Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare." - ) - else: - for preset in preset_list: - with st.container(): - st.markdown(f"#### {preset['name']}") - cols_preset_details = st.columns([3, 1, 1]) - with cols_preset_details[0]: - st.caption(f"Modello: {preset.get('model', 'N/A')}") - st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") - with cols_preset_details[1]: - if st.button( - "✏️ Modifica", - key=f"edit_{preset['id']}", - on_click=start_existing_preset_edit, - args=(preset['id'],), - use_container_width=True, - ): - pass - with cols_preset_details[2]: - if st.button( - "🗑️ Elimina", - key=f"delete_{preset['id']}", - on_click=delete_preset_callback, - args=(preset['id'],), - type="secondary", - use_container_width=True, - ): - pass - st.divider() - - # Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) - if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui - st.success(st.session_state.preset_applied_message) - del st.session_state.preset_applied_message - - if "preset_saved_message" in st.session_state: - st.success(st.session_state.preset_saved_message) - del st.session_state.preset_saved_message - - if "preset_deleted_message" in st.session_state: - st.success(st.session_state.preset_deleted_message) - del st.session_state.preset_deleted_message diff --git a/view/component_utils.py b/view/component_utils.py deleted file mode 100644 index d9b5127..0000000 --- a/view/component_utils.py +++ /dev/null @@ -1,176 +0,0 @@ -import streamlit as st - - -def create_card(title: str, content: str, icon: str | None = None, - is_success: bool = False, is_warning: bool = False, is_error: bool = False): - """Crea una scheda stilizzata con un contenuto personalizzabile.""" - color = "#4F6AF0" - bg_color = "white" - shadow_color = "rgba(79, 106, 240, 0.15)" - - if is_success: - color = "#28a745" - bg_color = "#f8fff9" - shadow_color = "rgba(40, 167, 69, 0.15)" - elif is_warning: - color = "#ffc107" - bg_color = "#fffef8" - shadow_color = "rgba(255, 193, 7, 0.15)" - elif is_error: - color = "#dc3545" - bg_color = "#fff8f8" - shadow_color = "rgba(220, 53, 69, 0.15)" - - icon_text = f'{icon}' if icon else "" - - st.markdown( - f""" - - -
-
{icon_text}{title}
-
{content}
-
- """, - unsafe_allow_html=True, - ) - - -def create_metrics_container(metrics_data: list[dict]): - """Crea un contenitore con metriche ben stilizzate.""" - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - - metrics_html = '
' - for metric in metrics_data: - icon_html = ( - f'
{metric.get("icon", "")}
' - if metric.get("icon") - else "" - ) - unit = metric.get("unit", "") - unit_html = f'{unit}' if unit else "" - help_text = f'title="{metric.get("help")}"' if metric.get("help") else "" - - metrics_html += f""" -
- {icon_html} -
{metric['value']}{unit_html}
-
{metric['label']}
-
- """ - - metrics_html += '
' - st.markdown(metrics_html, unsafe_allow_html=True) diff --git a/view/esecuzione_test.py b/view/esecuzione_test.py deleted file mode 100644 index f7fa14e..0000000 --- a/view/esecuzione_test.py +++ /dev/null @@ -1,168 +0,0 @@ -import streamlit as st - -from controllers.test_controller import execute_llm_test -from controllers.question_set_controller import load_sets -from controllers.api_preset_controller import load_presets -from view.style_utils import add_page_header, add_section_title - - -# === FUNZIONI DI CALLBACK === - -def set_llm_mode_callback(): - """Funzione di callback: imposta la modalità LLM""" - if st.session_state.test_mode != "Valutazione Automatica con LLM": - st.session_state.test_mode = "Valutazione Automatica con LLM" - st.session_state.mode_changed = True - - -def run_llm_test_callback(): - """Funzione di callback: esegue il test LLM""" - st.session_state.run_llm_test = True - - -def render(): - # === Inizializzazione delle variabili di stato === - if 'test_mode' not in st.session_state: - st.session_state.test_mode = "Valutazione Automatica con LLM" - if 'mode_changed' not in st.session_state: - st.session_state.mode_changed = False - if 'run_llm_test' not in st.session_state: - st.session_state.run_llm_test = False - - # Gestisce il cambio di modalità - if st.session_state.mode_changed: - st.session_state.mode_changed = False - st.rerun() - - add_page_header( - "Esecuzione Test", - icon="🧪", - description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." - ) - - # Carica i dati necessari, utilizzando cache e session state - if 'api_presets' not in st.session_state: - st.session_state.api_presets = load_presets() - if 'question_sets' not in st.session_state: - st.session_state.question_sets = load_sets() - - if st.session_state.api_presets.empty: - st.error( - "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' " - "per crearne almeno uno prima di eseguire i test." - ) - st.stop() - - # Controlla se ci sono set di domande disponibili - if st.session_state.question_sets.empty: - st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") - st.stop() - - # Seleziona set di domande per il test - add_section_title("Seleziona Set di Domande", icon="📚") - set_options = {} - if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: - for _, row in st.session_state.question_sets.iterrows(): - if 'questions' in row and row['questions']: - set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" - - if not set_options: - st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") - st.stop() - - selected_set_id = st.selectbox( - "Seleziona un set di domande", - options=list(set_options.keys()), - format_func=lambda x: set_options[x], - key="select_question_set_for_test" - ) - - selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] - questions_in_set = selected_set['questions'] - - # --- Opzioni API basate su Preset --- - add_section_title("Opzioni API basate su Preset", icon="🛠️") - - preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} - preset_display_names = list(preset_names_to_id.keys()) - - def get_preset_config_by_name(name): - preset_id = preset_names_to_id.get(name) - if preset_id: - return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() - return None - - # Seleziona preset per generazione risposta (comune a entrambe le modalità) - generation_preset_name = st.selectbox( - "Seleziona Preset per Generazione Risposta LLM", - options=preset_display_names, - index=0 if preset_display_names else None, # Seleziona il primo di default - key="generation_preset_select", - help="Il preset API utilizzato per generare la risposta alla domanda." - ) - st.session_state.selected_generation_preset_name = generation_preset_name - - # Seleziona preset per valutazione (solo per modalità LLM) - if st.session_state.test_mode == "Valutazione Automatica con LLM": - evaluation_preset_name = st.selectbox( - "Seleziona Preset per Valutazione Risposta LLM", - options=preset_display_names, - index=0 if preset_display_names else None, # Seleziona il primo di default - key="evaluation_preset_select", - help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." - ) - st.session_state.selected_evaluation_preset_name = evaluation_preset_name - - show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) - - # --- Logica di Esecuzione Test --- - test_mode_selected = st.session_state.test_mode - - if test_mode_selected == "Valutazione Automatica con LLM": - st.header("Esecuzione: Valutazione Automatica con LLM") - - # Pulsante che utilizza la funzione di callback - st.button( - "🚀 Esegui Test con LLM", - key="run_llm_test_btn", - on_click=run_llm_test_callback - ) - - # Gestisce l'esecuzione del test - if st.session_state.run_llm_test: - st.session_state.run_llm_test = False # Resetta lo stato - - gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) - eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) - - if not gen_preset_config or not eval_preset_config: - st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") - else: - with st.spinner("Generazione risposte e valutazione LLM in corso..."): - exec_result = execute_llm_test( - selected_set_id, - selected_set['name'], - questions_in_set, - gen_preset_config, - eval_preset_config, - show_api_details=show_api_details, - ) - - if exec_result: - st.session_state.results = exec_result['results_df'] - st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") - - # Visualizzazione risultati dettagliati - st.subheader("Risultati Dettagliati") - for q_id, result in exec_result['results'].items(): - with st.expander( - f"Domanda: {result['question'][:50]}..." - ): - col1, col2 = st.columns(2) - with col1: - st.write("**Domanda:**", result['question']) - st.write("**Risposta Attesa:**", result['expected_answer']) - with col2: - st.write("**Risposta Generata:**", result['actual_answer']) - st.write("**Punteggio:**", f"{result['evaluation']['score']:.1f}%") - st.write("**Valutazione:**", result['evaluation']['explanation']) diff --git a/view/gestione_domande.py b/view/gestione_domande.py deleted file mode 100644 index 8b3eb21..0000000 --- a/view/gestione_domande.py +++ /dev/null @@ -1,295 +0,0 @@ -import streamlit as st -import pandas as pd - -from controllers.question_controller import ( - add_question, - update_question, - delete_question, - import_questions_from_file, - filter_questions_by_category, -) -from services.question_service import load_questions, refresh_questions -from view.style_utils import add_page_header -from view.state_models import QuestionPageState - - -# === FUNZIONI DI CALLBACK === - - -def save_question_action( - question_id, edited_question, edited_answer, edited_category -) -> QuestionPageState: - """Salva le modifiche alla domanda e restituisce lo stato dell'operazione.""" - state = QuestionPageState() - if update_question( - question_id, - domanda=edited_question, - risposta_attesa=edited_answer, - categoria=edited_category, - ): - state.save_success = True - st.session_state.questions = refresh_questions() - state.trigger_rerun = True - else: - state.save_error = True - return state - - -def create_save_question_callback( - question_id, edited_question, edited_answer, edited_category -): - def callback(): - st.session_state.question_page_state = save_question_action( - question_id, edited_question, edited_answer, edited_category - ) - - return callback - - -def delete_question_action(question_id) -> QuestionPageState: - """Elimina la domanda e restituisce lo stato dell'operazione.""" - state = QuestionPageState() - delete_question(question_id) - state.delete_success = True - st.session_state.questions = refresh_questions() - state.trigger_rerun = True - return state - - -def import_questions_action(uploaded_file) -> QuestionPageState: - """Importa le domande da file e restituisce lo stato dell'operazione.""" - state = QuestionPageState() - if uploaded_file is not None: - success, message = import_questions_from_file(uploaded_file) - if success: - state.import_success = True - state.import_success_message = message - st.session_state.questions = refresh_questions() - state.trigger_rerun = True - else: - state.import_error = True - state.import_error_message = message - return state - - -def import_questions_callback(): - uploaded_file = st.session_state.get("uploaded_file_content") - st.session_state.question_page_state = import_questions_action(uploaded_file) - - -# === FUNZIONI DI DIALOGO === - -@st.dialog("Conferma Eliminazione") -def confirm_delete_question_dialog(question_id, question_text): - """Dialogo di conferma per l'eliminazione della domanda""" - st.write("Sei sicuro di voler eliminare questa domanda?") - st.write(f"**Domanda:** {question_text[:100]}...") - st.warning("Questa azione non può essere annullata.") - - col1, col2 = st.columns(2) - - with col1: - if st.button("Sì, Elimina", type="primary", use_container_width=True): - st.session_state.question_page_state = delete_question_action(question_id) - st.rerun() - - with col2: - if st.button("No, Annulla", use_container_width=True): - st.rerun() - - -def render(): - # === Inizializzazione dello stato === - st.session_state.setdefault("question_page_state", QuestionPageState()) - state: QuestionPageState = st.session_state.question_page_state - - # Carica le domande utilizzando la cache - st.session_state.questions = load_questions() - - # Gestisce la logica di rerun - if state.trigger_rerun: - state.trigger_rerun = False - st.rerun() - - # Mostra i messaggi di stato - if state.save_success: - st.success(state.save_success_message) - if state.save_error: - st.error(state.save_error_message) - if state.delete_success: - st.success(state.delete_success_message) - if state.add_success: - st.success(state.add_success_message) - if state.import_success: - st.success(state.import_success_message) - if state.import_error: - st.error(state.import_error_message) - - # Resetta lo stato dopo la visualizzazione dei messaggi - st.session_state.question_page_state = QuestionPageState() - - # Aggiungi un'intestazione stilizzata - add_page_header( - "Gestione Domande", - icon="📋", - description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." - ) - - # Scheda per diverse funzioni di gestione delle domande - tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) - - # Scheda Visualizza e Modifica Domande - with tabs[0]: - st.header("Visualizza e Modifica Domande") - - if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df, unique_categories = filter_questions_by_category() - category_options = ["Tutte le categorie"] + unique_categories - - selected_category = st.selectbox( - "Filtra per categoria:", - options=category_options, - index=0 - ) - - if selected_category == "Tutte le categorie": - filtered_questions_df = questions_df - else: - filtered_questions_df, _ = filter_questions_by_category(selected_category) - - if not filtered_questions_df.empty: - for idx, row in filtered_questions_df.iterrows(): - category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' - with st.expander( - f"Domanda: {row['domanda'][:100]}... (Categoria: {category_display})" - ): - col1, col2 = st.columns([3, 1]) - - with col1: - edited_question = st.text_area( - f"Modifica Domanda {idx + 1}", - value=row['domanda'], - key=f"q_edit_{row['id']}" - ) - - edited_answer = st.text_area( - f"Modifica Risposta Attesa {idx + 1}", - value=row['risposta_attesa'], - key=f"a_edit_{row['id']}" - ) - - edited_category_value = row.get('categoria', '') - edited_category = st.text_input( - f"Modifica Categoria {idx + 1}", - value=edited_category_value, - key=f"c_edit_{row['id']}" - ) - - with col2: - st.button( - "Salva Modifiche", - key=f"save_{row['id']}", - on_click=create_save_question_callback( - row['id'], edited_question, edited_answer, edited_category - ), - ) - - if st.button( - "Elimina Domanda", - key=f"delete_{row['id']}", - type="secondary" - ): - confirm_delete_question_dialog(row['id'], row['domanda']) - else: - st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") - - else: - st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") - - # Scheda Aggiungi Domande - with tabs[1]: - st.header("Aggiungi Nuova Domanda") - - with st.form("add_question_form"): - domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") - risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") - categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") - - submitted = st.form_submit_button("Aggiungi Domanda") - - if submitted: - if domanda and risposta_attesa: - # Passa la categoria, che può essere una stringa vuota se non inserita - question_id = add_question( - domanda=domanda, - risposta_attesa=risposta_attesa, - categoria=categoria, - ) - state = QuestionPageState() - state.add_success = True - state.add_success_message = ( - f"Domanda aggiunta con successo con ID: {question_id}" - ) - state.trigger_rerun = True - st.session_state.question_page_state = state - st.session_state.questions = refresh_questions() - st.rerun() - else: - st.error("Sono necessarie sia la domanda che la risposta attesa.") - - # Scheda Importa da File - with tabs[2]: - st.header("Importa Domande da File") - - st.write(""" - Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). - - ### Formato File: - - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. - Può includere opzionalmente 'categoria'. - (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). - - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. - Può includere opzionalmente 'categoria'. - (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). - - ### Esempio CSV: - ```csv - domanda,risposta_attesa,categoria - "Quanto fa 2+2?","4","Matematica Base" - "Qual è la capitale della Francia?","Parigi","Geografia" - "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" - ``` - - ### Esempio JSON: - ```json - [ - { - "domanda": "Quanto fa 2+2?", - "risposta_attesa": "4", - "categoria": "Matematica Base" - }, - { - "domanda": "Qual è la capitale della Francia?", - "risposta_attesa": "Parigi", - "categoria": "Geografia" - }, - { - "domanda": "Chi ha scritto 'Romeo e Giulietta'?", - "risposta_attesa": "William Shakespeare" - } - ] - ``` - """) - - uploaded_file = st.file_uploader("Scegli un file", type=["csv", "json"]) - - if uploaded_file is not None: - # Salva il file in session_state per l'uso da parte della callback - st.session_state.uploaded_file_content = uploaded_file - - # Pulsante che utilizza la funzione di callback - st.button( - "Importa Domande", - key="import_questions_btn", - on_click=import_questions_callback - ) diff --git a/view/gestione_set.py b/view/gestione_set.py deleted file mode 100644 index a8dc264..0000000 --- a/view/gestione_set.py +++ /dev/null @@ -1,396 +0,0 @@ -import streamlit as st -from controllers.question_set_controller import ( - create_set, - load_sets, -) -from services.question_service import load_questions -from view.style_utils import add_page_header, add_global_styles -from view.state_models import SetPageState -from view.set_helpers import ( - confirm_delete_set_dialog, - import_set_callback, - get_question_text, - get_question_category, - mark_expander_open, - create_save_set_callback, -) - - -def render(): - add_global_styles() - - st.session_state.setdefault("set_page_state", SetPageState()) - state: SetPageState = st.session_state.set_page_state - - st.session_state.setdefault("question_checkboxes", {}) - st.session_state.setdefault("newly_selected_questions", {}) - st.session_state.setdefault("set_expanders", {}) - - if state.trigger_rerun: - state.trigger_rerun = False - st.rerun() - - if state.save_set_success: - st.success(state.save_set_success_message) - state.save_set_success = False - - if state.save_set_error: - st.error(state.save_set_error_message) - state.save_set_error = False - - if state.delete_set_success: - st.success(state.delete_set_success_message) - state.delete_set_success = False - - if state.create_set_success: - st.success(state.create_set_success_message) - state.create_set_success = False - - if state.import_set_success: - st.success(state.import_set_success_message) - state.import_set_success = False - - if state.import_set_error: - st.error(state.import_set_error_message) - state.import_set_error = False - - # Inizializza i dati utilizzando la cache - if 'questions' not in st.session_state: - st.session_state.questions = load_questions() - if 'question_sets' not in st.session_state: - st.session_state.question_sets = load_sets() - - # Assicurati che esista lo stato degli expander per ogni set - if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: - current_set_ids = st.session_state.question_sets['id'].tolist() - # Rimuovi stati per set non più presenti - for sid in list(st.session_state.set_expanders.keys()): - if sid not in current_set_ids: - del st.session_state.set_expanders[sid] - # Aggiungi stato predefinito per nuovi set - for sid in current_set_ids: - st.session_state.set_expanders.setdefault(sid, False) - - # Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN - if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df_temp = st.session_state.questions - if 'categoria' not in questions_df_temp.columns: - questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante - questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN - st.session_state.questions = questions_df_temp - - # Aggiungi un'intestazione stilizzata - add_page_header( - "Gestione Set di Domande", - icon="📚", - description="Organizza le tue domande in set per test e valutazioni" - ) - - # Schede per diverse funzioni di gestione dei set - tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) - - # Scheda Visualizza e Modifica Set - with tabs[0]: - st.header("Visualizza e Modifica Set di Domande") - - questions_ready = ('questions' in st.session_state and - not st.session_state.questions.empty and - 'domanda' in st.session_state.questions.columns and - 'categoria' in st.session_state.questions.columns) - sets_ready = 'question_sets' in st.session_state - - if not questions_ready: - st.warning( - "Dati delle domande (incluse categorie) non completamente caricati. " - "Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'." - ) - # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti - unique_categories_for_filter = [] - selected_categories = [] - else: - questions_df = st.session_state.questions - # Ottieni categorie uniche per il filtro, escludendo 'N/A' - # se si preferisce non mostrarlo come opzione selezionabile - # o gestendolo specificamente. Per ora, includiamo tutto. - unique_categories_for_filter = sorted( - list(questions_df['categoria'].astype(str).unique()) - ) - if not unique_categories_for_filter: - st.info( - "Nessuna categoria definita nelle domande esistenti per poter filtrare." - ) - - selected_categories = st.multiselect( - "Filtra per categorie (mostra i set che contengono almeno una domanda da " - "OGNI categoria selezionata):", - options=unique_categories_for_filter, - default=[], - key="filter_categories", - ) - - if sets_ready and not st.session_state.question_sets.empty: - question_sets_df = st.session_state.question_sets - display_sets_df = question_sets_df.copy() # Inizia con tutti i set - - if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti - filtered_set_indices = [] - for idx, set_row in question_sets_df.iterrows(): - question_ids_in_set = set_row.get('questions', []) - if not isinstance(question_ids_in_set, list): - question_ids_in_set = [] - - if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro - continue - - categories_present_in_set = set() - for q_id in question_ids_in_set: - category = get_question_category(str(q_id), questions_df) - categories_present_in_set.add(category) - - # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata - if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): - filtered_set_indices.append(idx) - - display_sets_df = question_sets_df.loc[filtered_set_indices] - - if display_sets_df.empty and selected_categories: - st.info( - "Nessun set trovato che contenga domande da tutte le categorie selezionate: " - f"{', '.join(selected_categories)}." - ) - elif display_sets_df.empty and not selected_categories: - st.info( - "Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'." - ) - - for idx, row in display_sets_df.iterrows(): - exp_key = f"set_expander_{row['id']}" - if exp_key not in st.session_state.set_expanders: - st.session_state.set_expanders[exp_key] = False - - with st.expander( - f"Set: {row['name']}", - expanded=st.session_state.set_expanders.get(exp_key, False), - ): - col1, col2 = st.columns([3, 1]) - - with col1: - _ = st.text_input( - "Nome Set", - value=row['name'], - key=f"set_name_{row['id']}", - on_change=mark_expander_open, - args=(exp_key,) - ) - - st.subheader("Domande in questo Set") - current_question_ids_in_set = row.get('questions', []) - if not isinstance(current_question_ids_in_set, list): - current_question_ids_in_set = [] - - if row['id'] not in st.session_state.question_checkboxes: - st.session_state.question_checkboxes[row['id']] = {} - - if current_question_ids_in_set: - for q_id in current_question_ids_in_set: - q_text = get_question_text(str(q_id)) - q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' - display_text = f"{q_text} (Categoria: {q_cat})" - - # 使用回调来更新checkbox状态 - checkbox_value = st.checkbox( - display_text, - value=True, - key=f"qcheck_{row['id']}_{q_id}", - on_change=mark_expander_open, - args=(exp_key,) - ) - st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value - else: - st.info("Nessuna domanda in questo set.") - - st.subheader("Aggiungi Domande al Set") - - # 初始化新选择的问题状态 - if row['id'] not in st.session_state.newly_selected_questions: - st.session_state.newly_selected_questions[row['id']] = [] - - if questions_ready: - all_questions_df = st.session_state.questions - available_questions_df = all_questions_df[ - ~all_questions_df['id'].astype(str).isin( - [str(q_id) for q_id in current_question_ids_in_set] - ) - ] - - if not available_questions_df.empty: - question_dict_for_multiselect = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" - for q_id, q_text in zip( - available_questions_df['id'].astype(str), - available_questions_df['domanda'], - ) - } - newly_selected_questions_ids = st.multiselect( - "Seleziona domande da aggiungere", - options=list(question_dict_for_multiselect.keys()), - format_func=lambda x: question_dict_for_multiselect.get(x, x), - key=f"add_q_{row['id']}", - on_change=mark_expander_open, - args=(exp_key,) - ) - st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids - else: - st.info("Nessuna altra domanda disponibile da aggiungere.") - else: - st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") - - with col2: - st.button( - "Salva Modifiche", - key=f"save_set_{row['id']}", - on_click=create_save_set_callback(row['id'], exp_key, state) - ) - - # Pulsante Elimina con dialog di conferma - if st.button( - "Elimina Set", - key=f"delete_set_{row['id']}", - type="secondary" - ): - mark_expander_open(exp_key) - confirm_delete_set_dialog(row['id'], row['name'], state) - - # Lo stato dell'expander viene aggiornato tramite i callback - - elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): - st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") - - # Scheda Crea Nuovo Set - with tabs[1]: - st.header("Crea Nuovo Set di Domande") - - with st.form("create_set_form"): - set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") - - selected_qs_for_new_set = [] - questions_ready_for_creation = ( - 'questions' in st.session_state and - not st.session_state.questions.empty and - 'domanda' in st.session_state.questions.columns and - 'categoria' in st.session_state.questions.columns - ) - - if questions_ready_for_creation: - all_questions_df_creation = st.session_state.questions - question_dict_for_creation = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" - for q_id, q_text in zip( - all_questions_df_creation['id'].astype(str), - all_questions_df_creation['domanda'], - ) - } - - selected_qs_for_new_set = st.multiselect( - "Seleziona domande per questo set", - options=list(question_dict_for_creation.keys()), - format_func=lambda x: question_dict_for_creation.get(x, x), - key="create_set_questions", - ) - else: - st.info( - "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). ", - "Vai a 'Gestione Domande' per aggiungere/caricare domande." - ) - - submitted = st.form_submit_button("Crea Set") - - if submitted: - if set_name: - set_id = create_set( - set_name, [str(q_id) for q_id in selected_qs_for_new_set] - ) - state.create_set_success_message = ( - f"Set di domande creato con successo con ID: {set_id}" - ) - state.create_set_success = True - state.trigger_rerun = True - st.rerun() - else: - st.error("Il nome del set è obbligatorio.") - - # Scheda Importa da File - with tabs[2]: - st.header("Importa Set da File") - - st.write(""" - Carica un file JSON o CSV contenente uno o più set di domande. - - ### Formato File JSON per Set Multipli: - ```json - [ - { - "name": "Capitali", - "questions": [ - { - "id": "1", - "domanda": "Qual è la capitale della Francia?", - "risposta_attesa": "Parigi", - "categoria": "Geografia" - }, - { - "id": "2", - "domanda": "Qual è la capitale della Germania?", - "risposta_attesa": "Berlino", - "categoria": "Geografia" - } - ] - }, - { - "name": "Matematica Base", - "questions": [ - { - "id": "3", - "domanda": "Quanto fa 2+2?", - "risposta_attesa": "4", - "categoria": "Matematica" - }, - { - "id": "4", - "domanda": "Quanto fa 10*4?", - "risposta_attesa": "40", - "categoria": "Matematica" - } - ] - } - ] - ``` - - ### Formato CSV: - Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` - (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e - ``categoria``. - ```csv - name,id,domanda,risposta_attesa,categoria - Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia - Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia - Matematica Base,3,Quanto fa 2+2?,4,Matematica - Matematica Base,4,Quanto fa 10*4?,40,Matematica - ``` - - ### Note Importanti: - - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente - - Se un set con lo stesso nome esiste già, verrà saltato - - Solo le domande nuove verranno aggiunte al database - - Le domande esistenti verranno referenziate nei nuovi set - """) - - uploaded_file = st.file_uploader("Scegli un file", type=["json", "csv"]) - - if uploaded_file is not None: - st.session_state.uploaded_file_content_set = uploaded_file - st.button( - "Importa Set", - key="import_set_btn", - on_click=lambda: import_set_callback(state) - ) diff --git a/view/home.py b/view/home.py deleted file mode 100644 index 5538c4d..0000000 --- a/view/home.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Home page view module for the Streamlit application.""" - -import streamlit as st -from .style_utils import add_home_styles - - -def render(): - """Visualizza la pagina principale con le funzionalità della piattaforma.""" - - add_home_styles() - - st.markdown( - """ -
-

🧠 Piattaforma di Valutazione LLM

-

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

-
-""", - unsafe_allow_html=True, - ) - - # Box delle funzionalità con icone e stile migliorato - col1, col2 = st.columns(2) - - with col1: - st.markdown( - """ -
-

- 📋 - Gestione delle Domande -

-

- Crea, modifica e organizza le tue domande di test con le risposte previste. - Costruisci set di test completi per valutare le risposte LLM in modo efficiente. -

-
- -
-

- 🔌 - Supporto Multi-Provider API -

-

- Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. - Configura parametri API e verifica le connessioni con feedback in tempo reale. -

-
- """, - unsafe_allow_html=True, - ) - - with col2: - st.markdown( - """ -
-

- 🧪 - Valutazione Automatizzata -

-

- Esegui test con punteggio automatico rispetto alle risposte previste. - Valuta la somiglianza semantica tra testi con modelli linguistici. -

-
- -
-

- 📊 - Analisi Avanzata -

-

- Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. - Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. -

-
- """, - unsafe_allow_html=True, - ) - - st.markdown( - """ -
-

🚀 Iniziare

-
    -
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. -
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. -
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. -
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. -
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. -
-

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

-
-""", - unsafe_allow_html=True, - ) diff --git a/view/session_state.py b/view/session_state.py deleted file mode 100644 index 6e99fe8..0000000 --- a/view/session_state.py +++ /dev/null @@ -1,30 +0,0 @@ -import streamlit as st - -from controllers.startup_controller import get_initial_state - - -def ensure_keys(defaults: dict) -> None: - """Garantisce la presenza delle chiavi in ``st.session_state``. - - Parametri: - defaults: Dizionario con chiavi e valori da impostare se mancanti. - """ - for key, value in defaults.items(): - st.session_state.setdefault(key, value) - - -def initialize_session_state() -> None: - """Inizializza ``st.session_state`` con i valori di default.""" - required_keys = [ - "questions", - "question_sets", - "results", - "api_key", - "endpoint", - "model", - "temperature", - "max_tokens", - ] - if any(key not in st.session_state for key in required_keys): - defaults = get_initial_state() - ensure_keys(defaults) diff --git a/view/set_helpers.py b/view/set_helpers.py deleted file mode 100644 index 4cbf8ce..0000000 --- a/view/set_helpers.py +++ /dev/null @@ -1,136 +0,0 @@ -import streamlit as st - -from controllers.question_set_controller import ( - update_set, - delete_set, - import_sets_from_file, - refresh_question_sets, -) -from services.question_service import refresh_questions -from .state_models import SetPageState - - -def save_set_callback( - set_id: str, - edited_name: str, - question_options_checkboxes: dict, - newly_selected_questions_ids: list[str], - state: SetPageState, -) -> None: - kept_questions_ids = [q_id for q_id, keep in question_options_checkboxes.items() if keep] - updated_questions_ids = list( - set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids]) - ) - - update_set(set_id, edited_name, updated_questions_ids) - state.save_set_success_message = "Set di domande aggiornato con successo!" - state.save_set_success = True - st.session_state.question_sets = refresh_question_sets() - state.trigger_rerun = True - - -def delete_set_callback(set_id: str, state: SetPageState): - delete_set(set_id) - state.delete_set_success_message = "Set di domande eliminato con successo!" - state.delete_set_success = True - st.session_state.question_sets = refresh_question_sets() - state.trigger_rerun = True - - -@st.dialog("Conferma Eliminazione") -def confirm_delete_set_dialog(set_id: str, set_name: str, state: SetPageState): - """Dialog di conferma per l'eliminazione del set di domande""" - st.write(f"Sei sicuro di voler eliminare il set '{set_name}'?") - st.warning("Questa azione non può essere annullata.") - - col1, col2 = st.columns(2) - - with col1: - if st.button("Sì, Elimina", type="primary", use_container_width=True): - delete_set_callback(set_id, state) - st.rerun() - - with col2: - if st.button("No, Annulla", use_container_width=True): - st.rerun() - - -def import_set_callback(state: SetPageState): - """Importa uno o più set di domande da file JSON o CSV.""" - - state.import_set_success = False - state.import_set_error = False - state.import_set_success_message = "" - state.import_set_error_message = "" - - uploaded_file = st.session_state.get("uploaded_file_content_set") - result = import_sets_from_file(uploaded_file) - - if result["success"]: - state.import_set_success = True - state.import_set_success_message = result["success_message"] - st.session_state.questions = refresh_questions() - st.session_state.question_sets = refresh_question_sets() - st.session_state.uploaded_file_content_set = None - else: - state.import_set_error = True - state.import_set_error_message = result["error_message"] - - for warn in result.get("warnings", []): - st.warning(warn) - - state.trigger_rerun = True - - -def get_question_text(question_id: str) -> str: - """Ritorna il testo della domanda dato il suo ID.""" - if "questions" in st.session_state and not st.session_state.questions.empty: - if "domanda" not in st.session_state.questions.columns: - st.session_state.questions = refresh_questions() - if "domanda" not in st.session_state.questions.columns: - return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" - - question_row = st.session_state.questions[st.session_state.questions["id"] == str(question_id)] - if not question_row.empty: - return question_row.iloc[0]["domanda"] - return f"ID Domanda: {question_id} (non trovata o dati non caricati)" - - -def get_question_category(question_id: str, questions_df): - """Ritorna la categoria di una domanda dato il suo ID.""" - if questions_df is not None and not questions_df.empty and "categoria" in questions_df.columns: - question_row = questions_df[questions_df["id"] == str(question_id)] - if not question_row.empty: - return question_row.iloc[0]["categoria"] - return "N/A" - - -def mark_expander_open(exp_key: str): - """Segna l'expander come aperto nello stato di sessione""" - if "set_expanders" in st.session_state: - st.session_state.set_expanders[exp_key] = True - - -def create_save_set_callback(set_id: str, exp_key: str, state: SetPageState): - def callback(): - mark_expander_open(exp_key) - edited_name = st.session_state.get(f"set_name_{set_id}", "") - question_options_checkboxes = st.session_state.question_checkboxes.get(set_id, {}) - newly_selected_questions_ids = st.session_state.newly_selected_questions.get(set_id, []) - - save_set_callback( - set_id, - edited_name, - question_options_checkboxes, - newly_selected_questions_ids, - state, - ) - - return callback - - -def create_delete_set_callback(set_id: str, state: SetPageState): - def callback(): - delete_set_callback(set_id, state) - - return callback diff --git a/view/state_models.py b/view/state_models.py deleted file mode 100644 index 4bceddf..0000000 --- a/view/state_models.py +++ /dev/null @@ -1,47 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class SetPageState: - """Transient UI state for the question set management page.""" - - save_set_success: bool = False - save_set_success_message: str = "Set aggiornato con successo!" - save_set_error: bool = False - save_set_error_message: str = "Errore durante l'aggiornamento del set." - - delete_set_success: bool = False - delete_set_success_message: str = "Set eliminato con successo!" - - create_set_success: bool = False - create_set_success_message: str = "Set creato con successo!" - - import_set_success: bool = False - import_set_success_message: str = "Importazione completata con successo!" - import_set_error: bool = False - import_set_error_message: str = "Errore durante l'importazione." - - trigger_rerun: bool = False - - -@dataclass -class QuestionPageState: - """Transient UI state for the question management page.""" - - save_success: bool = False - save_success_message: str = "Domanda aggiornata con successo!" - save_error: bool = False - save_error_message: str = "Impossibile aggiornare la domanda." - - delete_success: bool = False - delete_success_message: str = "Domanda eliminata con successo!" - - add_success: bool = False - add_success_message: str = "Domanda aggiunta con successo!" - - import_success: bool = False - import_success_message: str = "Importazione completata con successo!" - import_error: bool = False - import_error_message: str = "Errore durante l'importazione." - - trigger_rerun: bool = False diff --git a/view/style_utils.py b/view/style_utils.py deleted file mode 100644 index 0a78ba5..0000000 --- a/view/style_utils.py +++ /dev/null @@ -1,372 +0,0 @@ -"""Funzioni di utilità per applicare stili CSS nelle viste Streamlit. - -Centralizza l'iniezione di CSS per favorirne il riuso tra le pagine. -""" - -import streamlit as st - - -def add_global_styles(): - """Aggiunge stili globali all'applicazione.""" - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - - -def add_page_header(title: str, icon: str = "💡", description: str | None = None): - """Aggiunge un'intestazione di pagina stilizzata.""" - add_global_styles() - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - - st.markdown( - f""" - -
- """, - unsafe_allow_html=True, - ) - - -def add_section_title(title: str, icon: str | None = None): - """Aggiunge un titolo di sezione stilizzato.""" - icon_text = f"{icon} " if icon else "" - st.markdown( - f"
{icon_text}{title}
", - unsafe_allow_html=True, - ) - - -def add_home_styles(): - """Applica gli stili CSS specifici della home page. - - Migliora la visibilità degli input nei temi chiaro e scuro e definisce - l'aspetto degli elementi principali come box funzionali e sezioni di - benvenuto. - """ - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) diff --git a/view/ui_utils.py b/view/ui_utils.py deleted file mode 100644 index 83fbd66..0000000 --- a/view/ui_utils.py +++ /dev/null @@ -1,10 +0,0 @@ -from .style_utils import add_global_styles, add_page_header, add_section_title -from .component_utils import create_card, create_metrics_container - -__all__ = [ - "add_global_styles", - "add_page_header", - "add_section_title", - "create_card", - "create_metrics_container", -] diff --git a/view/visualizza_risultati.py b/view/visualizza_risultati.py deleted file mode 100644 index 96424f1..0000000 --- a/view/visualizza_risultati.py +++ /dev/null @@ -1,465 +0,0 @@ -import streamlit as st -import pandas as pd -import json -import plotly.express as px -import plotly.graph_objects as go - -from controllers.test_controller import ( - import_results_from_file, - load_results, - refresh_results, - calculate_statistics, -) -from controllers.question_set_controller import load_sets -from controllers.api_preset_controller import load_presets -from view.style_utils import add_page_header, add_section_title - - -def render(): - add_page_header( - "Visualizzazione Risultati Test", - icon="📊", - description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." - ) - - # Carica i risultati utilizzando la cache - if 'results' not in st.session_state: - st.session_state.results = load_results() - if st.session_state.results.empty: - st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") - st.stop() - - # Carica i set di domande utilizzando la cache - if 'question_sets' not in st.session_state: - st.session_state.question_sets = load_sets() - - # Carica i preset API utilizzando la cache - if 'api_presets' not in st.session_state: - st.session_state.api_presets = load_presets() - - # Stato per messaggi di importazione risultati - if 'import_results_success' not in st.session_state: - st.session_state.import_results_success = False - if 'import_results_error' not in st.session_state: - st.session_state.import_results_error = False - if 'import_results_message' not in st.session_state: - st.session_state.import_results_message = "" - - if st.session_state.import_results_success: - st.success(st.session_state.import_results_message) - st.session_state.import_results_success = False - if st.session_state.import_results_error: - st.error(st.session_state.import_results_message) - st.session_state.import_results_error = False - - def get_set_name(set_id): - if not st.session_state.question_sets.empty: - set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] - if not set_info.empty: - return set_info.iloc[0]['name'] - return "Set Sconosciuto" - - def get_model_from_preset_name(preset_name): - """Restituisce il modello associato a un preset, se disponibile.""" - if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: - preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] - if not preset_row.empty: - return preset_row.iloc[0]['model'] - return "Sconosciuto" - - def import_results_callback(): - """Callback per importare risultati da file JSON.""" - if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: - success, message = import_results_from_file(st.session_state.uploaded_results_file) - st.session_state.import_results_message = message - st.session_state.import_results_success = success - st.session_state.import_results_error = not success - if success: - st.session_state.results = refresh_results() - st.session_state.uploaded_results_file = None - - # Filtri per Set e Modello LLM - all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) - - all_model_names = sorted({ - r['results']['generation_llm'] - for _, r in st.session_state.results.iterrows() - if r['results'].get('generation_llm') - }) - - selected_set_filter = st.selectbox( - "Filtra per Set", - options=["Tutti"] + all_set_names, - index=0, - key="filter_set_name" - ) - - selected_model_filter = st.selectbox( - "Filtra per Modello LLM", - options=["Tutti"] + all_model_names, - index=0, - key="filter_model_name" - ) - - filtered_results_df = st.session_state.results - if selected_set_filter != "Tutti": - set_ids = st.session_state.question_sets[ - st.session_state.question_sets['name'] == selected_set_filter - ]['id'].astype(str) - filtered_results_df = filtered_results_df[ - filtered_results_df['set_id'].astype(str).isin(set_ids) - ] - - if selected_model_filter != "Tutti": - filtered_results_df = filtered_results_df[ - filtered_results_df['results'].apply( - lambda res: res.get('generation_llm') == selected_model_filter - ) - ] - - # Elabora i risultati per la visualizzazione nel selectbox - processed_results_for_select = [] - for _, row in filtered_results_df.iterrows(): - result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli - set_name = get_set_name(row['set_id']) - avg_score = result_data.get('avg_score', 0) - method = result_data.get('method', 'N/A') - method_icon = "🤖" if method == "LLM" else "📊" - - processed_results_for_select.append( - { - 'id': row['id'], - 'display_name': ( - f"{row['timestamp']} - {method_icon} {set_name} " - f"(Avg: {avg_score:.2f}%) - {method}" - ), - } - ) - - processed_results_for_select.sort( - key=lambda x: x['display_name'].split(' - ')[0], - reverse=True, - ) # Ordina per timestamp - - result_options = {r['id']: r['display_name'] for r in processed_results_for_select} - - # Seleziona il risultato da visualizzare - selected_result_id = st.selectbox( - "Seleziona un Risultato del Test da Visualizzare", - options=list(result_options.keys()), - format_func=lambda x: result_options[x], - index=0 if result_options else None, - key="select_test_result_to_view" - ) - - # Opzionalmente seleziona un secondo risultato per il confronto - # Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso - compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] - compare_result_id = st.selectbox( - "Confronta con un altro risultato (opzionale)", - options=[None] + compare_options, - format_func=lambda x: "Nessun confronto" if x is None else result_options[x], - index=0, - key="select_test_result_compare" - ) - if not selected_result_id: - st.info("Nessun risultato selezionato o disponibile.") - st.stop() - - # Ottieni i dati del risultato selezionato - selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] - result_data = selected_result_row['results'] - set_name = get_set_name(selected_result_row['set_id']) - questions_results = result_data.get('questions', {}) - - with st.expander("Esporta/Importa Risultati"): - col_exp, col_imp = st.columns(2) - with col_exp: - selected_json = json.dumps({ - 'id': selected_result_row['id'], - 'set_id': selected_result_row['set_id'], - 'timestamp': selected_result_row['timestamp'], - 'results': result_data - }, indent=2) - st.download_button( - "Export Risultato Selezionato", - selected_json, - file_name=f"result_{selected_result_row['id']}.json", - mime="application/json" - ) - - all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) - st.download_button( - "Export Tutti i Risultati", - all_json, - file_name="all_results.json", - mime="application/json" - ) - - with col_imp: - uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") - if uploaded_file is not None: - st.session_state.uploaded_results_file = uploaded_file - st.button( - "Importa Risultati", - on_click=import_results_callback, - key="import_results_btn" - ) - - # Carica eventuale risultato di confronto - compare_result_row = None - compare_result_data = None - compare_questions_results = {} - if compare_result_id: - compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] - compare_result_data = compare_result_row['results'] - compare_questions_results = compare_result_data.get('questions', {}) - - # Visualizza informazioni generali sul risultato - evaluation_method = result_data.get('method', 'LLM') - method_icon = "🤖" if evaluation_method == "LLM" else "📊" - method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" - - add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") - st.markdown(f"**ID Risultato:** `{selected_result_id}`") - st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") - st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") - - if 'generation_llm' in result_data: - st.markdown(f"**LLM Generazione Risposte:** `{result_data['generation_llm']}`") - elif 'generation_preset' in result_data: - st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") - if evaluation_method == "LLM": - if 'evaluation_llm' in result_data: - st.markdown(f"**LLM Valutazione Risposte:** `{result_data['evaluation_llm']}`") - elif 'evaluation_preset' in result_data: - st.markdown( - f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`" - ) - - # Metriche Generali del Test - add_section_title("Metriche Generali del Test", icon="📈") - - if questions_results: - stats = calculate_statistics(questions_results) - avg_score_overall = stats["avg_score"] - num_questions = len(stats["per_question_scores"]) - - cols_metrics = st.columns(2) - with cols_metrics[0]: - st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") - with cols_metrics[1]: - st.metric("Numero di Domande Valutate", num_questions) - - compare_stats = None - if compare_result_row is not None: - compare_stats = calculate_statistics(compare_questions_results) - compare_avg = compare_stats["avg_score"] - diff_avg = compare_avg - avg_score_overall - st.markdown("### Confronto") - cols_cmp = st.columns(3) - cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") - cols_cmp[1].metric("Punteggio Confronto", f"{compare_avg:.2f}%") - cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") - - scores_data = [] - for item in stats["per_question_scores"]: - label = item["question"] - label = label[:50] + "..." if len(label) > 50 else label - scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Selezionato"}) - if compare_stats: - for item in compare_stats["per_question_scores"]: - label = item["question"] - label = label[:50] + "..." if len(label) > 50 else label - scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Confronto"}) - - if scores_data: - df_scores = pd.DataFrame(scores_data) - unique_questions = len({d['Domanda'] for d in scores_data}) - fig = px.bar( - df_scores, - x='Domanda', - y='Punteggio', - color='Tipo', - barmode='group', - title="Punteggi per Domanda", - height=max(400, unique_questions * 30), - ) - fig.update_layout(yaxis_range=[0, 100]) - st.plotly_chart(fig, use_container_width=True) - - if evaluation_method == "LLM": - categories = ['Somiglianza', 'Correttezza', 'Completezza'] - fig_radar = go.Figure() - rm = stats["radar_metrics"] - fig_radar.add_trace( - go.Scatterpolar( - r=[rm['similarity'], rm['correctness'], rm['completeness']], - theta=categories, - fill='toself', - name='Selezionato', - ) - ) - if compare_stats: - crm = compare_stats["radar_metrics"] - fig_radar.add_trace( - go.Scatterpolar( - r=[crm['similarity'], crm['correctness'], crm['completeness']], - theta=categories, - fill='toself', - name='Confronto', - ) - ) - fig_radar.update_layout( - title="Grafico Radar delle Metriche LLM", - polar=dict(radialaxis=dict(visible=True, range=[0, 100])), - showlegend=True, - legend=dict( - orientation="h", - yanchor="bottom", - y=-0.2, - xanchor="center", - x=0.5, - ), - height=600, - ) - st.plotly_chart(fig_radar, use_container_width=True) - - st.subheader("Valori medi delle metriche") - cols = st.columns(3) - cols[0].metric("Somiglianza", f"{rm['similarity']:.2f}%") - cols[1].metric("Correttezza", f"{rm['correctness']:.2f}%") - cols[2].metric("Completezza", f"{rm['completeness']:.2f}%") - - if compare_stats: - cols_cmp = st.columns(3) - cols_cmp[0].metric("Somiglianza (Confronto)", f"{crm['similarity']:.2f}%") - cols_cmp[1].metric("Correttezza (Confronto)", f"{crm['correctness']:.2f}%") - cols_cmp[2].metric("Completezza (Confronto)", f"{crm['completeness']:.2f}%") - else: - st.info("Nessun dettaglio per le domande disponibile in questo risultato.") - - if compare_result_row is not None: - add_section_title("Confronto Dettagliato per Domanda", icon="🔍") - comparison_rows = [] - all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) - for qid in all_q_ids: - q1 = questions_results.get(qid, {}) - q2 = compare_questions_results.get(qid, {}) - label = q1.get('question') or q2.get('question') or str(qid) - score1 = q1.get('evaluation', {}).get('score', None) - score2 = q2.get('evaluation', {}).get('score', None) - delta = None - if score1 is not None and score2 is not None: - delta = score2 - score1 - comparison_rows.append({ - 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), - 'Selezionato': score1, - 'Confronto': score2, - 'Delta': delta - }) - if comparison_rows: - df_comp = pd.DataFrame(comparison_rows) - st.dataframe(df_comp) - - # Dettagli per ogni domanda - add_section_title("Risultati Dettagliati per Domanda", icon="📝") - if not questions_results: - st.info("Nessuna domanda trovata in questo set di risultati.") - else: - for q_id, q_data in questions_results.items(): - question_text = q_data.get('question', "Testo domanda non disponibile") - expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") - actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") - - with st.expander( - f"Domanda: {question_text[:100]}..." - ): - st.markdown(f"**Domanda:** {question_text}") - st.markdown(f"**Risposta Attesa:** {expected_answer}") - st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") - st.divider() - - # Mostra Dettagli API di Generazione (se presenti e richiesti) - generation_api_details = q_data.get('generation_api_details') - if generation_api_details and isinstance(generation_api_details, dict): - with st.container(): - st.markdown("###### Dettagli Chiamata API di Generazione Risposta") - if generation_api_details.get('request'): - st.caption("Richiesta API Generazione:") - st.json( - generation_api_details['request'], expanded=False - ) - if generation_api_details.get('response_content'): - st.caption("Contenuto Risposta API Generazione:") - # Prova a formattare se è una stringa JSON, altrimenti mostra com'è - try: - content = generation_api_details['response_content'] - if isinstance(content, str): - response_data_gen = json.loads(content) - else: - response_data_gen = content - st.code( - json.dumps(response_data_gen, indent=2), - language="json", - ) - except Exception: - st.text( - generation_api_details['response_content'] - ) - if generation_api_details.get('error'): - st.caption("Errore API Generazione:") - st.error(generation_api_details['error']) - st.divider() - - if evaluation_method == "LLM": - evaluation = q_data.get( - 'evaluation', {} - ) # Assicurati che evaluation sia sempre un dizionario - st.markdown("##### Valutazione LLM") - score = evaluation.get('score', 0) - explanation = evaluation.get( - 'explanation', "Nessuna spiegazione." - ) - similarity = evaluation.get('similarity', 0) - correctness = evaluation.get('correctness', 0) - completeness = evaluation.get('completeness', 0) - - st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") - st.markdown(f"**Spiegazione:** {explanation}") - - cols_eval_metrics = st.columns(3) - cols_eval_metrics[0].metric( - "Somiglianza", f"{similarity:.2f}%" - ) - cols_eval_metrics[1].metric( - "Correttezza", f"{correctness:.2f}%" - ) - cols_eval_metrics[2].metric( - "Completezza", f"{completeness:.2f}%" - ) - - api_details = evaluation.get('api_details') - if api_details and isinstance(api_details, dict): - with st.container(): # Sostituisce l'expander interno - st.markdown( - "###### Dettagli Chiamata API di Valutazione" - ) - if api_details.get('request'): - st.caption("Richiesta API:") - st.json(api_details['request'], expanded=False) - if api_details.get('response_content'): - st.caption("Contenuto Risposta API:") - content = api_details['response_content'] - parsed = json.loads(content) if isinstance( - content, str - ) else content - st.code( - json.dumps(parsed, indent=2), - language="json", - ) - if api_details.get('error'): - st.caption("Errore API:") - st.error(api_details['error']) - - st.markdown("--- --- ---") From d63435c86c637c2322f85ee6c787524592a65b5c Mon Sep 17 00:00:00 2001 From: oniichan Date: Wed, 6 Aug 2025 12:14:04 +0200 Subject: [PATCH 05/41] import bug fixed --- controllers/openai_client.py | 94 ++++++ tests/test_api_preset_controller.py | 117 +++++++ tests/test_evaluate_answer.py | 88 ++++++ tests/test_openai_controllers.py | 93 ++++++ utils/__init__.py | 0 utils/cache.py | 65 ++++ views/__init__.py | 4 + views/api_configurazione.py | 283 +++++++++++++++++ views/component_utils.py | 179 +++++++++++ views/esecuzione_test.py | 169 ++++++++++ views/gestione_domande.py | 303 ++++++++++++++++++ views/gestione_set.py | 398 +++++++++++++++++++++++ views/home.py | 99 ++++++ views/session_state.py | 33 ++ views/set_helpers.py | 140 +++++++++ views/state_models.py | 50 +++ views/style_utils.py | 374 ++++++++++++++++++++++ views/ui_utils.py | 13 + views/visualizza_risultati.py | 469 ++++++++++++++++++++++++++++ 19 files changed, 2971 insertions(+) create mode 100644 controllers/openai_client.py create mode 100644 tests/test_api_preset_controller.py create mode 100644 tests/test_evaluate_answer.py create mode 100644 tests/test_openai_controllers.py create mode 100644 utils/__init__.py create mode 100644 utils/cache.py create mode 100644 views/__init__.py create mode 100644 views/api_configurazione.py create mode 100644 views/component_utils.py create mode 100644 views/esecuzione_test.py create mode 100644 views/gestione_domande.py create mode 100644 views/gestione_set.py create mode 100644 views/home.py create mode 100644 views/session_state.py create mode 100644 views/set_helpers.py create mode 100644 views/state_models.py create mode 100644 views/style_utils.py create mode 100644 views/ui_utils.py create mode 100644 views/visualizza_risultati.py diff --git a/controllers/openai_client.py b/controllers/openai_client.py new file mode 100644 index 0000000..270c60f --- /dev/null +++ b/controllers/openai_client.py @@ -0,0 +1,94 @@ +"""Utility per interagire con le API dei provider LLM.""" + +# mypy: ignore-errors + +import logging + +from openai import OpenAI +logger = logging.getLogger(__name__) + +DEFAULT_MODEL = "gpt-4o" +DEFAULT_ENDPOINT = "https://api.openai.com/v1" + +# Modelli disponibili per diversi provider (esempio) +OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] +ANTHROPIC_MODELS = [ + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307", +] +# Aggiungi altri provider e modelli se necessario +# XAI_MODELS = ["grok-1"] + + +def get_openai_client(api_key: str, base_url: str | None = None): + """Crea e restituisce un client OpenAI configurato.""" + + if not api_key: + logger.warning("Tentativo di creare client OpenAI senza chiave API.") + return None + try: + effective_base_url = ( + base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT + ) + return OpenAI(api_key=api_key, base_url=effective_base_url) + except Exception as exc: + logger.error(f"Errore durante la creazione del client OpenAI: {exc}") + return None + + +def get_available_models_for_endpoint( + provider_name: str, endpoint_url: str | None = None, api_key: str | None = None +): + """Restituisce una lista di modelli disponibili basata sul provider o sull'endpoint.""" + + if provider_name == "OpenAI": + return OPENAI_MODELS + if provider_name == "Anthropic": + return ANTHROPIC_MODELS + # Aggiungi altri provider predefiniti qui + # elif provider_name == "XAI": + # return XAI_MODELS + if provider_name == "Personalizzato": + if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): + return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] + + client = get_openai_client(api_key=api_key, base_url=endpoint_url) + if not client: + return ["(Errore creazione client API)", DEFAULT_MODEL] + try: + models = client.models.list() + filtered_models = sorted( + [ + model.id + for model in models + if not any(term in model.id.lower() for term in ["embed", "embedding"]) + and ( + any( + term in model.id.lower() + for term in ["chat", "instruct", "gpt", "claude", "grok"] + ) + or len(model.id.split("-")) > 2 + ) + ] + ) + if not filtered_models: + filtered_models = sorted( + [ + model.id + for model in models + if not any(term in model.id.lower() for term in ["embed", "embedding"]) + ] + ) + return filtered_models if filtered_models else [DEFAULT_MODEL] + except Exception: + return ["(Errore recupero modelli)", DEFAULT_MODEL] + return [DEFAULT_MODEL] + + +__all__ = [ + "DEFAULT_MODEL", + "DEFAULT_ENDPOINT", + "get_openai_client", + "get_available_models_for_endpoint", +] diff --git a/tests/test_api_preset_controller.py b/tests/test_api_preset_controller.py new file mode 100644 index 0000000..a34da64 --- /dev/null +++ b/tests/test_api_preset_controller.py @@ -0,0 +1,117 @@ +import os +import sys +from unittest.mock import Mock, patch + +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import api_preset_controller as controller # noqa: E402 + + +@patch("controllers.api_preset_controller.load_presets") +def test_validate_preset_empty_name(mock_load): + ok, msg = controller.validate_preset({"name": ""}) + assert ok is False + assert "non può essere vuoto" in msg + mock_load.assert_not_called() + + +@patch("controllers.api_preset_controller.load_presets") +def test_validate_preset_duplicate(mock_load): + mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) + ok, msg = controller.validate_preset({"name": "A"}) + assert ok is False + assert "esiste già" in msg + + +@patch("controllers.api_preset_controller.load_presets") +def test_validate_preset_ok(mock_load): + mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) + ok, msg = controller.validate_preset({"name": "B"}) + assert ok is True + assert msg == "" + + +@patch("controllers.api_preset_controller.refresh_api_presets") +@patch("controllers.api_preset_controller.APIPreset.save_df") +@patch("controllers.api_preset_controller.load_presets") +@patch("controllers.api_preset_controller.uuid.uuid4", return_value="new-id") +def test_save_preset_new(mock_uuid, mock_load, mock_save_df, mock_refresh): + df = pd.DataFrame([ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ]) + mock_load.return_value = df + updated_df = pd.DataFrame([]) + mock_refresh.return_value = updated_df + + ok, msg, returned_df = controller.save_preset( + { + "name": "New", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.1, + "max_tokens": 50, + } + ) + + assert ok is True + assert "creato" in msg + assert returned_df is updated_df + mock_save_df.assert_called_once() + saved_df = mock_save_df.call_args[0][0] + assert "new-id" in saved_df["id"].values + assert "New" in saved_df["name"].values + + +@patch("controllers.api_preset_controller.refresh_api_presets") +@patch("controllers.api_preset_controller.APIPreset.delete") +@patch("controllers.api_preset_controller.load_presets") +def test_delete_preset(mock_load, mock_delete, mock_refresh): + df = pd.DataFrame([ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ]) + mock_load.return_value = df + updated_df = pd.DataFrame([]) + mock_refresh.return_value = updated_df + + ok, msg, returned_df = controller.delete_preset("1") + assert ok is True + assert "eliminato" in msg + assert returned_df is updated_df + mock_delete.assert_called_once_with("1") + + +@patch("controllers.api_preset_controller.openai_client.get_openai_client") +def test_test_api_connection_delegates(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_choice = Mock() + mock_choice.message = Mock() + mock_choice.message.content = "Connessione riuscita." + mock_resp = Mock() + mock_resp.choices = [mock_choice] + mock_client.chat.completions.create.return_value = mock_resp + + ok, msg = controller.test_api_connection("k", "e", "m", 0.1, 10) + + assert ok is True + assert "riuscita" in msg.lower() + mock_get_client.assert_called_once_with(api_key="k", base_url="e") diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py new file mode 100644 index 0000000..94615b9 --- /dev/null +++ b/tests/test_evaluate_answer.py @@ -0,0 +1,88 @@ +import json +import logging +import os +import sys +from unittest.mock import Mock, patch + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import test_controller # noqa: E402 + + +def _mock_response(content: str): + mock_resp = Mock() + mock_choice = Mock() + mock_choice.message = Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +def _mock_response_no_choices(): + mock_resp = Mock() + mock_resp.choices = [] + return mock_resp + + +@patch("controllers.test_controller.openai_client.get_openai_client") +def test_evaluate_answer_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + + evaluation = { + "score": 90, + "explanation": "good", + "similarity": 90, + "correctness": 90, + "completeness": 90, + } + mock_client.chat.completions.create.return_value = _mock_response( + json.dumps(evaluation) + ) + + result = test_controller.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"}, show_api_details=True + ) + + assert result["score"] == 90 + assert result["similarity"] == 90 + assert "api_details" in result + + +@patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) +def test_evaluate_answer_no_client(mock_get_client): + result = test_controller.evaluate_answer( + "q", "expected", "actual", {"api_key": None} + ) + + assert result["score"] == 0 + assert "Client API" in result["explanation"] + + +@patch("controllers.test_controller.openai_client.get_openai_client") +def test_evaluate_answer_json_decode_error(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response("not json") + + result = test_controller.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert result["score"] == 0 + assert "Errore di decodifica JSON" in result["explanation"] + + +@patch("controllers.test_controller.openai_client.get_openai_client") +def test_evaluate_answer_no_choices(mock_get_client, caplog): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response_no_choices() + + with caplog.at_level(logging.ERROR): + result = test_controller.evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) + + assert result["score"] == 0 + assert "choices" in caplog.text diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py new file mode 100644 index 0000000..b916773 --- /dev/null +++ b/tests/test_openai_controllers.py @@ -0,0 +1,93 @@ +import os +import sys +from unittest.mock import Mock, patch + +# Aggiunge la cartella principale al percorso dei moduli per i test +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import api_preset_controller, test_controller # noqa: E402 + + +def _mock_response(content: str): + """Crea una risposta simulata con il contenuto fornito.""" + mock_resp = Mock() + mock_choice = Mock() + mock_choice.message = Mock() + mock_choice.message.content = content + mock_resp.choices = [mock_choice] + return mock_resp + + +@patch("controllers.test_controller.openai_client.get_openai_client") +def test_generate_example_answer_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response(" answer ") + + result = test_controller.generate_example_answer_with_llm( + "question", {"api_key": "key"} + ) + + assert result["answer"] == "answer" + + +@patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) +def test_generate_example_answer_no_client(mock_get_client): + result = test_controller.generate_example_answer_with_llm( + "question", {"api_key": None}, show_api_details=True + ) + + assert result["answer"] is None + assert result["api_details"]["error"] == "Client API non configurato" + + +@patch("controllers.test_controller.openai_client.get_openai_client") +def test_generate_example_answer_empty_question(mock_get_client): + mock_get_client.return_value = Mock() + + result = test_controller.generate_example_answer_with_llm( + "", {"api_key": "key"}, show_api_details=True + ) + + assert result["answer"] is None + assert result["api_details"]["error"] == "Domanda vuota o non valida" + + +@patch("controllers.api_preset_controller.openai_client.get_openai_client") +def test_test_api_connection_success(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response( + "Connessione riuscita." + ) + + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is True + assert msg == "Connessione API riuscita!" + + +@patch("controllers.api_preset_controller.openai_client.get_openai_client") +def test_test_api_connection_unexpected_response(mock_get_client): + mock_client = Mock() + mock_get_client.return_value = mock_client + mock_client.chat.completions.create.return_value = _mock_response("failure") + + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Risposta inattesa" in msg + + +@patch("controllers.api_preset_controller.openai_client.get_openai_client", return_value=None) +def test_test_api_connection_no_client(mock_get_client): + ok, msg = api_preset_controller.test_api_connection( + "key", "endpoint", "model", 0.1, 10 + ) + + assert ok is False + assert "Client API non inizializzato" in msg diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/cache.py b/utils/cache.py new file mode 100644 index 0000000..0b9a97d --- /dev/null +++ b/utils/cache.py @@ -0,0 +1,65 @@ +from functools import lru_cache +from dataclasses import asdict +import pandas as pd + +from models.question import Question +from models.question_set import QuestionSet +from models.api_preset import APIPreset +from models.test_result import TestResult + + +@lru_cache(maxsize=1) +def get_questions() -> pd.DataFrame: + data = [asdict(q) for q in Question.load_all()] + columns = ["id", "domanda", "risposta_attesa", "categoria"] + return pd.DataFrame(data, columns=columns) + + +def refresh_questions() -> pd.DataFrame: + get_questions.cache_clear() + return get_questions() + + +@lru_cache(maxsize=1) +def get_question_sets() -> pd.DataFrame: + data = [asdict(s) for s in QuestionSet.load_all()] + columns = ["id", "name", "questions"] + return pd.DataFrame(data, columns=columns) + + +def refresh_question_sets() -> pd.DataFrame: + get_question_sets.cache_clear() + return get_question_sets() + + +@lru_cache(maxsize=1) +def get_api_presets() -> pd.DataFrame: + data = [asdict(p) for p in APIPreset.load_all()] + columns = [ + "id", + "name", + "provider_name", + "endpoint", + "api_key", + "model", + "temperature", + "max_tokens", + ] + return pd.DataFrame(data, columns=columns) + + +def refresh_api_presets() -> pd.DataFrame: + get_api_presets.cache_clear() + return get_api_presets() + + +@lru_cache(maxsize=1) +def get_results() -> pd.DataFrame: + data = [asdict(r) for r in TestResult.load_all()] + columns = ["id", "set_id", "timestamp", "results"] + return pd.DataFrame(data, columns=columns) + + +def refresh_results() -> pd.DataFrame: + get_results.cache_clear() + return get_results() diff --git a/views/__init__.py b/views/__init__.py new file mode 100644 index 0000000..06bdc3a --- /dev/null +++ b/views/__init__.py @@ -0,0 +1,4 @@ +"""Views package.""" + +import logging +logger = logging.getLogger(__name__) diff --git a/views/api_configurazione.py b/views/api_configurazione.py new file mode 100644 index 0000000..c8d27a1 --- /dev/null +++ b/views/api_configurazione.py @@ -0,0 +1,283 @@ +import logging +import streamlit as st + +from views.style_utils import add_page_header, add_section_title +from controllers import ( + save_preset, + delete_preset, + load_presets, + list_presets, + get_preset_by_id, + validate_preset, + test_api_connection, +) +from controllers.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + +logger = logging.getLogger(__name__) + + +# Funzioni di callback per i pulsanti del form +def start_new_preset_edit(): + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = None # Indica nuovo preset + st.session_state.preset_form_data = { + "name": "", + "endpoint": DEFAULT_ENDPOINT, + "api_key": "", + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000 + } + + +def start_existing_preset_edit(preset_id): + preset_to_edit = get_preset_by_id(preset_id, st.session_state.api_presets) + if not preset_to_edit: + st.error("Preset non trovato.") + return + st.session_state.editing_preset = True + st.session_state.current_preset_edit_id = preset_id + st.session_state.preset_form_data = preset_to_edit.copy() + # Assicura che i campi numerici siano del tipo corretto per gli slider/number_input + st.session_state.preset_form_data["temperature"] = float( + st.session_state.preset_form_data.get("temperature", 0.0) + ) + st.session_state.preset_form_data["max_tokens"] = int( + st.session_state.preset_form_data.get("max_tokens", 1000) + ) + if "endpoint" not in st.session_state.preset_form_data: + st.session_state.preset_form_data["endpoint"] = DEFAULT_ENDPOINT + + +def cancel_preset_edit(): + st.session_state.editing_preset = False + st.session_state.current_preset_edit_id = None + st.session_state.preset_form_data = {} + + +def save_preset_from_form(): + """Salva un preset leggendo i valori direttamente dagli input della form.""" + # Recupera sempre i valori correnti dei widget dal session_state + preset_name = st.session_state.get("preset_name", "").strip() + endpoint = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + api_key = st.session_state.get("preset_api_key", "") + model = st.session_state.get("preset_model", DEFAULT_MODEL) + temperature = float( + st.session_state.get( + "preset_temperature", + st.session_state.preset_form_data.get("temperature", 0.0), + ) + ) + max_tokens = int( + st.session_state.get( + "preset_max_tokens", + st.session_state.preset_form_data.get("max_tokens", 1000), + ) + ) + + # Aggiorna il dizionario del form in sessione con i valori raccolti + st.session_state.preset_form_data.update( + { + "name": preset_name, + "endpoint": endpoint, + "api_key": api_key, + "model": model, + "temperature": temperature, + "max_tokens": max_tokens, + } + ) + + form_data = st.session_state.preset_form_data.copy() + current_id = st.session_state.current_preset_edit_id + + is_valid, validation_message = validate_preset(form_data, current_id) + if not is_valid: + st.error(validation_message) + return + + success, message, updated_df = save_preset(form_data, current_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + cancel_preset_edit() # Chiudi il form + else: + st.error(message) + + +def delete_preset_callback(preset_id): + success, message, updated_df = delete_preset(preset_id) + if success: + st.session_state.api_presets = updated_df + st.success(message) + if st.session_state.current_preset_edit_id == preset_id: + cancel_preset_edit() # Se stavamo modificando il preset eliminato, chiudi il form + else: + st.error(message) + + +def render(): + add_page_header( + "Gestione Preset API", + icon="⚙️", + description="Crea, visualizza, testa ed elimina i preset di configurazione API per LLM." + ) + + # Stato della sessione per la gestione del form di creazione/modifica preset + if "editing_preset" not in st.session_state: + st.session_state.editing_preset = False + if "current_preset_edit_id" not in st.session_state: + st.session_state.current_preset_edit_id = None # None per nuovo, ID per modifica + if "preset_form_data" not in st.session_state: + st.session_state.preset_form_data = {} + + # Carica i preset API utilizzando la cache + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + + # Sezione per visualizzare/modificare i preset + if st.session_state.editing_preset: + add_section_title("Modifica/Crea Preset API", icon="✏️") + form_data = st.session_state.preset_form_data + + with st.form(key="preset_form"): + # Usa un key specifico per il campo nome e aggiorna il form_data + form_data["name"] = st.text_input( + "Nome del Preset", + value=form_data.get("name", ""), + key="preset_name", # Key esplicita per il campo nome + help="Un nome univoco per questo preset." + ) + + # Campo chiave API con key esplicita + form_data["api_key"] = st.text_input( + "Chiave API", + value=form_data.get("api_key", ""), + type="password", + key="preset_api_key", # Key esplicita per la chiave API + help="La tua chiave API per il provider selezionato." + ) + + # Campo endpoint con key esplicita + form_data["endpoint"] = st.text_input( + "Provider Endpoint", + value=form_data.get("endpoint", DEFAULT_ENDPOINT), + placeholder="https://api.openai.com/v1", + key="preset_endpoint", # Key esplicita per l'endpoint + help="Inserisci l'endpoint del provider API (es: https://api.openai.com/v1)" + ) + + # Modello sempre personalizzabile + form_data["model"] = st.text_input( + "Modello", + value=form_data.get("model", DEFAULT_MODEL), + placeholder="gpt-4o", + key="preset_model", # Key esplicita per il modello + help="Inserisci il nome del modello (es: gpt-4o, claude-3-sonnet, ecc.)" + ) + + form_data["temperature"] = st.slider( + "Temperatura", + 0.0, + 2.0, + float(form_data.get("temperature", 0.0)), + 0.1, + key="preset_temperature", + ) + form_data["max_tokens"] = st.number_input( + "Max Tokens", + min_value=50, + max_value=8000, + value=int(form_data.get("max_tokens", 1000)), + step=50, + key="preset_max_tokens", + ) + + # Campo Test Connessione e pulsanti di salvataggio/annullamento + # Pulsante Test Connessione + if st.form_submit_button("⚡ Testa Connessione API"): + # Usa direttamente i valori dal session_state per il test + api_key_to_test = st.session_state.get("preset_api_key", "") + endpoint_to_test = st.session_state.get("preset_endpoint", DEFAULT_ENDPOINT) + model_to_test = st.session_state.get("preset_model", DEFAULT_MODEL) + + with st.spinner("Test in corso..."): + success, message = test_api_connection( + api_key=api_key_to_test, + endpoint=endpoint_to_test, + model=model_to_test, + temperature=form_data.get("temperature", 0.0), + max_tokens=form_data.get("max_tokens", 1000) + ) + if success: + st.success(message) + else: + st.error(message) + + # Pulsanti Salva e Annulla + cols_form_buttons = st.columns(2) + with cols_form_buttons[0]: + if st.form_submit_button( + "💾 Salva Preset", + on_click=save_preset_from_form, + type="primary", + use_container_width=True, + ): + pass # Il callback gestisce il salvataggio + with cols_form_buttons[1]: + if st.form_submit_button( + "❌ Annulla", + on_click=cancel_preset_edit, + use_container_width=True, + ): + pass # Il callback gestisce il cambio di stato + else: + add_section_title("Preset API Salvati", icon="🗂️") + if st.button("➕ Crea Nuovo Preset", on_click=start_new_preset_edit, use_container_width=True): + pass # Il callback gestisce il cambio di stato + + preset_list = list_presets(st.session_state.api_presets) + if not preset_list: + st.info( + "Nessun preset API salvato. Clicca su 'Crea Nuovo Preset' per iniziare." + ) + else: + for preset in preset_list: + with st.container(): + st.markdown(f"#### {preset['name']}") + cols_preset_details = st.columns([3, 1, 1]) + with cols_preset_details[0]: + st.caption(f"Modello: {preset.get('model', 'N/A')}") + st.caption(f"Endpoint: {preset.get('endpoint', 'N/A')}") + with cols_preset_details[1]: + if st.button( + "✏️ Modifica", + key=f"edit_{preset['id']}", + on_click=start_existing_preset_edit, + args=(preset['id'],), + use_container_width=True, + ): + pass + with cols_preset_details[2]: + if st.button( + "🗑️ Elimina", + key=f"delete_{preset['id']}", + on_click=delete_preset_callback, + args=(preset['id'],), + type="secondary", + use_container_width=True, + ): + pass + st.divider() + + # Mostra messaggi di conferma dopo il ricaricamento della pagina (se impostati dai callback) + if "preset_applied_message" in st.session_state: # Questo non dovrebbe più essere usato qui + st.success(st.session_state.preset_applied_message) + del st.session_state.preset_applied_message + + if "preset_saved_message" in st.session_state: + st.success(st.session_state.preset_saved_message) + del st.session_state.preset_saved_message + + if "preset_deleted_message" in st.session_state: + st.success(st.session_state.preset_deleted_message) + del st.session_state.preset_deleted_message diff --git a/views/component_utils.py b/views/component_utils.py new file mode 100644 index 0000000..ba79784 --- /dev/null +++ b/views/component_utils.py @@ -0,0 +1,179 @@ +import logging + +import streamlit as st +logger = logging.getLogger(__name__) + + +def create_card(title: str, content: str, icon: str | None = None, + is_success: bool = False, is_warning: bool = False, is_error: bool = False): + """Crea una scheda stilizzata con un contenuto personalizzabile.""" + color = "#4F6AF0" + bg_color = "white" + shadow_color = "rgba(79, 106, 240, 0.15)" + + if is_success: + color = "#28a745" + bg_color = "#f8fff9" + shadow_color = "rgba(40, 167, 69, 0.15)" + elif is_warning: + color = "#ffc107" + bg_color = "#fffef8" + shadow_color = "rgba(255, 193, 7, 0.15)" + elif is_error: + color = "#dc3545" + bg_color = "#fff8f8" + shadow_color = "rgba(220, 53, 69, 0.15)" + + icon_text = f'{icon}' if icon else "" + + st.markdown( + f""" + + +
+
{icon_text}{title}
+
{content}
+
+ """, + unsafe_allow_html=True, + ) + + +def create_metrics_container(metrics_data: list[dict]): + """Crea un contenitore con metriche ben stilizzate.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + metrics_html = '
' + for metric in metrics_data: + icon_html = ( + f'
{metric.get("icon", "")}
' + if metric.get("icon") + else "" + ) + unit = metric.get("unit", "") + unit_html = f'{unit}' if unit else "" + help_text = f'title="{metric.get("help")}"' if metric.get("help") else "" + + metrics_html += f""" +
+ {icon_html} +
{metric['value']}{unit_html}
+
{metric['label']}
+
+ """ + + metrics_html += '
' + st.markdown(metrics_html, unsafe_allow_html=True) diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py new file mode 100644 index 0000000..a45cb9c --- /dev/null +++ b/views/esecuzione_test.py @@ -0,0 +1,169 @@ +import logging + +import streamlit as st + +from controllers import execute_llm_test, load_sets, load_presets +from views.style_utils import add_page_header, add_section_title +logger = logging.getLogger(__name__) + + +# === FUNZIONI DI CALLBACK === + +def set_llm_mode_callback(): + """Funzione di callback: imposta la modalità LLM""" + if st.session_state.test_mode != "Valutazione Automatica con LLM": + st.session_state.test_mode = "Valutazione Automatica con LLM" + st.session_state.mode_changed = True + + +def run_llm_test_callback(): + """Funzione di callback: esegue il test LLM""" + st.session_state.run_llm_test = True + + +def render(): + # === Inizializzazione delle variabili di stato === + if 'test_mode' not in st.session_state: + st.session_state.test_mode = "Valutazione Automatica con LLM" + if 'mode_changed' not in st.session_state: + st.session_state.mode_changed = False + if 'run_llm_test' not in st.session_state: + st.session_state.run_llm_test = False + + # Gestisce il cambio di modalità + if st.session_state.mode_changed: + st.session_state.mode_changed = False + st.rerun() + + add_page_header( + "Esecuzione Test", + icon="🧪", + description="Esegui valutazioni automatiche sui tuoi set di domande utilizzando i preset API configurati." + ) + + # Carica i dati necessari, utilizzando cache e session state + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + if st.session_state.api_presets.empty: + st.error( + "Nessun preset API configurato. Vai alla pagina 'Gestione Preset API' " + "per crearne almeno uno prima di eseguire i test." + ) + st.stop() + + # Controlla se ci sono set di domande disponibili + if st.session_state.question_sets.empty: + st.warning("Nessun set di domande disponibile. Crea dei set di domande prima di eseguire i test.") + st.stop() + + # Seleziona set di domande per il test + add_section_title("Seleziona Set di Domande", icon="📚") + set_options = {} + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + for _, row in st.session_state.question_sets.iterrows(): + if 'questions' in row and row['questions']: + set_options[row['id']] = f"{row['name']} ({len(row['questions'])} domande)" + + if not set_options: + st.warning("Nessun set di domande con domande associate. Creane uno in 'Gestione Set di Domande'.") + st.stop() + + selected_set_id = st.selectbox( + "Seleziona un set di domande", + options=list(set_options.keys()), + format_func=lambda x: set_options[x], + key="select_question_set_for_test" + ) + + selected_set = st.session_state.question_sets[st.session_state.question_sets['id'] == selected_set_id].iloc[0] + questions_in_set = selected_set['questions'] + + # --- Opzioni API basate su Preset --- + add_section_title("Opzioni API basate su Preset", icon="🛠️") + + preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} + preset_display_names = list(preset_names_to_id.keys()) + + def get_preset_config_by_name(name): + preset_id = preset_names_to_id.get(name) + if preset_id: + return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() + return None + + # Seleziona preset per generazione risposta (comune a entrambe le modalità) + generation_preset_name = st.selectbox( + "Seleziona Preset per Generazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="generation_preset_select", + help="Il preset API utilizzato per generare la risposta alla domanda." + ) + st.session_state.selected_generation_preset_name = generation_preset_name + + # Seleziona preset per valutazione (solo per modalità LLM) + if st.session_state.test_mode == "Valutazione Automatica con LLM": + evaluation_preset_name = st.selectbox( + "Seleziona Preset per Valutazione Risposta LLM", + options=preset_display_names, + index=0 if preset_display_names else None, # Seleziona il primo di default + key="evaluation_preset_select", + help="Il preset API utilizzato dall'LLM per valutare la similarità e correttezza della risposta generata." + ) + st.session_state.selected_evaluation_preset_name = evaluation_preset_name + + show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) + + # --- Logica di Esecuzione Test --- + test_mode_selected = st.session_state.test_mode + + if test_mode_selected == "Valutazione Automatica con LLM": + st.header("Esecuzione: Valutazione Automatica con LLM") + + # Pulsante che utilizza la funzione di callback + st.button( + "🚀 Esegui Test con LLM", + key="run_llm_test_btn", + on_click=run_llm_test_callback + ) + + # Gestisce l'esecuzione del test + if st.session_state.run_llm_test: + st.session_state.run_llm_test = False # Resetta lo stato + + gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) + eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) + + if not gen_preset_config or not eval_preset_config: + st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") + else: + with st.spinner("Generazione risposte e valutazione LLM in corso..."): + exec_result = execute_llm_test( + selected_set_id, + selected_set['name'], + questions_in_set, + gen_preset_config, + eval_preset_config, + show_api_details=show_api_details, + ) + + if exec_result: + st.session_state.results = exec_result['results_df'] + st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") + + # Visualizzazione risultati dettagliati + st.subheader("Risultati Dettagliati") + for q_id, result in exec_result['results'].items(): + with st.expander( + f"Domanda: {result['question'][:50]}..." + ): + col1, col2 = st.columns(2) + with col1: + st.write("**Domanda:**", result['question']) + st.write("**Risposta Attesa:**", result['expected_answer']) + with col2: + st.write("**Risposta Generata:**", result['actual_answer']) + st.write("**Punteggio:**", f"{result['evaluation']['score']:.1f}%") + st.write("**Valutazione:**", result['evaluation']['explanation']) diff --git a/views/gestione_domande.py b/views/gestione_domande.py new file mode 100644 index 0000000..e25023a --- /dev/null +++ b/views/gestione_domande.py @@ -0,0 +1,303 @@ +import logging + +import streamlit as st +import pandas as pd + +from controllers import ( + add_question, + update_question, + delete_question, + filter_questions_by_category, + load_questions, + refresh_questions, + import_questions_from_file, +) +from views.style_utils import add_page_header +from views.state_models import QuestionPageState +logger = logging.getLogger(__name__) + + +# === FUNZIONI DI CALLBACK === + + +def save_question_action( + question_id, edited_question, edited_answer, edited_category +) -> QuestionPageState: + """Salva le modifiche alla domanda e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + if update_question( + question_id, + domanda=edited_question, + risposta_attesa=edited_answer, + categoria=edited_category, + ): + state.save_success = True + st.session_state.questions = refresh_questions() + state.trigger_rerun = True + else: + state.save_error = True + return state + + +def create_save_question_callback( + question_id, edited_question, edited_answer, edited_category +): + def callback(): + st.session_state.question_page_state = save_question_action( + question_id, edited_question, edited_answer, edited_category + ) + + return callback + + +def delete_question_action(question_id) -> QuestionPageState: + """Elimina la domanda e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + delete_question(question_id) + state.delete_success = True + st.session_state.questions = refresh_questions() + state.trigger_rerun = True + return state + + +def import_questions_action(uploaded_file) -> QuestionPageState: + """Importa le domande da file e restituisce lo stato dell'operazione.""" + state = QuestionPageState() + if uploaded_file is not None: + success, message = import_questions_from_file(uploaded_file) + if success: + state.import_success = True + state.import_success_message = message + st.session_state.questions = refresh_questions() + state.trigger_rerun = True + else: + state.import_error = True + state.import_error_message = message + return state + + +def import_questions_callback(): + uploaded_file = st.session_state.get("uploaded_file_content") + st.session_state.question_page_state = import_questions_action(uploaded_file) + st.session_state.upload_questions_file = None + st.session_state.uploaded_file_content = None + + +# === FUNZIONI DI DIALOGO === + +@st.dialog("Conferma Eliminazione") +def confirm_delete_question_dialog(question_id, question_text): + """Dialogo di conferma per l'eliminazione della domanda""" + st.write("Sei sicuro di voler eliminare questa domanda?") + st.write(f"**Domanda:** {question_text[:100]}...") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + st.session_state.question_page_state = delete_question_action(question_id) + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +def render(): + # === Inizializzazione dello stato === + st.session_state.setdefault("question_page_state", QuestionPageState()) + state: QuestionPageState = st.session_state.question_page_state + + # Carica le domande utilizzando la cache + st.session_state.questions = load_questions() + + # Gestisce la logica di rerun + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + # Mostra i messaggi di stato + if state.save_success: + st.success(state.save_success_message) + if state.save_error: + st.error(state.save_error_message) + if state.delete_success: + st.success(state.delete_success_message) + if state.add_success: + st.success(state.add_success_message) + if state.import_success: + st.success(state.import_success_message) + if state.import_error: + st.error(state.import_error_message) + + # Resetta lo stato dopo la visualizzazione dei messaggi + st.session_state.question_page_state = QuestionPageState() + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Domande", + icon="📋", + description="Crea, modifica e gestisci le tue domande, le risposte attese e le categorie." + ) + + # Scheda per diverse funzioni di gestione delle domande + tabs = st.tabs(["Visualizza & Modifica Domande", "Aggiungi Domande", "Importa da File"]) + + # Scheda Visualizza e Modifica Domande + with tabs[0]: + st.header("Visualizza e Modifica Domande") + + if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df, unique_categories = filter_questions_by_category() + category_options = ["Tutte le categorie"] + unique_categories + + selected_category = st.selectbox( + "Filtra per categoria:", + options=category_options, + index=0 + ) + + if selected_category == "Tutte le categorie": + filtered_questions_df = questions_df + else: + filtered_questions_df, _ = filter_questions_by_category(selected_category) + + if not filtered_questions_df.empty: + for idx, row in filtered_questions_df.iterrows(): + category_display = row.get('categoria', 'N/A') if pd.notna(row.get('categoria')) else 'N/A' + with st.expander( + f"Domanda: {row['domanda'][:100]}... (Categoria: {category_display})" + ): + col1, col2 = st.columns([3, 1]) + + with col1: + edited_question = st.text_area( + f"Modifica Domanda {idx + 1}", + value=row['domanda'], + key=f"q_edit_{row['id']}" + ) + + edited_answer = st.text_area( + f"Modifica Risposta Attesa {idx + 1}", + value=row['risposta_attesa'], + key=f"a_edit_{row['id']}" + ) + + edited_category_value = row.get('categoria', '') + edited_category = st.text_input( + f"Modifica Categoria {idx + 1}", + value=edited_category_value, + key=f"c_edit_{row['id']}" + ) + + with col2: + st.button( + "Salva Modifiche", + key=f"save_{row['id']}", + on_click=create_save_question_callback( + row['id'], edited_question, edited_answer, edited_category + ), + ) + + if st.button( + "Elimina Domanda", + key=f"delete_{row['id']}", + type="secondary" + ): + confirm_delete_question_dialog(row['id'], row['domanda']) + else: + st.info(f"Nessuna domanda trovata per la categoria '{selected_category}'.") + + else: + st.info("Nessuna domanda disponibile. Aggiungi domande utilizzando la scheda 'Aggiungi Domande'.") + + # Scheda Aggiungi Domande + with tabs[1]: + st.header("Aggiungi Nuova Domanda") + + with st.form("add_question_form"): + domanda = st.text_area("Domanda", placeholder="Inserisci qui la domanda...") + risposta_attesa = st.text_area("Risposta Attesa", placeholder="Inserisci qui la risposta attesa...") + categoria = st.text_input("Categoria (opzionale)", placeholder="Inserisci qui la categoria...") + + submitted = st.form_submit_button("Aggiungi Domanda") + + if submitted: + if domanda and risposta_attesa: + # Passa la categoria, che può essere una stringa vuota se non inserita + question_id = add_question( + domanda=domanda, + risposta_attesa=risposta_attesa, + categoria=categoria, + ) + state = QuestionPageState() + state.add_success = True + state.add_success_message = ( + f"Domanda aggiunta con successo con ID: {question_id}" + ) + state.trigger_rerun = True + st.session_state.question_page_state = state + st.session_state.questions = refresh_questions() + st.rerun() + else: + st.error("Sono necessarie sia la domanda che la risposta attesa.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Domande da File") + + st.write(""" + Carica un file CSV o JSON contenente domande, risposte attese e categorie (opzionale). + + ### Formato File: + - **CSV**: Deve includere le colonne 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + - **JSON**: Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. + Può includere opzionalmente 'categoria'. + (Se usi i vecchi nomi 'question' e 'expected_answer', verranno convertiti automaticamente). + + ### Esempio CSV: + ```csv + domanda,risposta_attesa,categoria + "Quanto fa 2+2?","4","Matematica Base" + "Qual è la capitale della Francia?","Parigi","Geografia" + "Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" + ``` + + ### Esempio JSON: + ```json + [ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } + ] + ``` + """) + + uploaded_file = st.file_uploader( + "Scegli un file", type=["csv", "json"], key="upload_questions_file" + ) + + if uploaded_file is not None: + # Salva il file in session_state per l'uso da parte della callback + st.session_state.uploaded_file_content = uploaded_file + + # Pulsante che utilizza la funzione di callback + st.button( + "Importa Domande", + key="import_questions_btn", + on_click=import_questions_callback + ) diff --git a/views/gestione_set.py b/views/gestione_set.py new file mode 100644 index 0000000..544640b --- /dev/null +++ b/views/gestione_set.py @@ -0,0 +1,398 @@ +import logging +import streamlit as st +from controllers import create_set, load_sets, load_questions +from views.style_utils import add_page_header, add_global_styles +from views.state_models import SetPageState +from views.set_helpers import ( + confirm_delete_set_dialog, + import_set_callback, + get_question_text, + get_question_category, + mark_expander_open, + create_save_set_callback, +) + +logger = logging.getLogger(__name__) + + +def render(): + add_global_styles() + + st.session_state.setdefault("set_page_state", SetPageState()) + state: SetPageState = st.session_state.set_page_state + + st.session_state.setdefault("question_checkboxes", {}) + st.session_state.setdefault("newly_selected_questions", {}) + st.session_state.setdefault("set_expanders", {}) + + if state.trigger_rerun: + state.trigger_rerun = False + st.rerun() + + if state.save_set_success: + st.success(state.save_set_success_message) + state.save_set_success = False + + if state.save_set_error: + st.error(state.save_set_error_message) + state.save_set_error = False + + if state.delete_set_success: + st.success(state.delete_set_success_message) + state.delete_set_success = False + + if state.create_set_success: + st.success(state.create_set_success_message) + state.create_set_success = False + + if state.import_set_success: + st.success(state.import_set_success_message) + state.import_set_success = False + + if state.import_set_error: + st.error(state.import_set_error_message) + state.import_set_error = False + + # Inizializza i dati utilizzando la cache + if 'questions' not in st.session_state: + st.session_state.questions = load_questions() + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + # Assicurati che esista lo stato degli expander per ogni set + if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: + current_set_ids = st.session_state.question_sets['id'].tolist() + # Rimuovi stati per set non più presenti + for sid in list(st.session_state.set_expanders.keys()): + if sid not in current_set_ids: + del st.session_state.set_expanders[sid] + # Aggiungi stato predefinito per nuovi set + for sid in current_set_ids: + st.session_state.set_expanders.setdefault(sid, False) + + # Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN + if 'questions' in st.session_state and not st.session_state.questions.empty: + questions_df_temp = st.session_state.questions + if 'categoria' not in questions_df_temp.columns: + questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante + questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN + st.session_state.questions = questions_df_temp + + # Aggiungi un'intestazione stilizzata + add_page_header( + "Gestione Set di Domande", + icon="📚", + description="Organizza le tue domande in set per test e valutazioni" + ) + + # Schede per diverse funzioni di gestione dei set + tabs = st.tabs(["Visualizza & Modifica Set", "Crea Nuovo Set", "Importa Set da file"]) + + # Scheda Visualizza e Modifica Set + with tabs[0]: + st.header("Visualizza e Modifica Set di Domande") + + questions_ready = ('questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns) + sets_ready = 'question_sets' in st.session_state + + if not questions_ready: + st.warning( + "Dati delle domande (incluse categorie) non completamente caricati. " + "Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'." + ) + # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti + unique_categories_for_filter = [] + selected_categories = [] + else: + questions_df = st.session_state.questions + # Ottieni categorie uniche per il filtro, escludendo 'N/A' + # se si preferisce non mostrarlo come opzione selezionabile + # o gestendolo specificamente. Per ora, includiamo tutto. + unique_categories_for_filter = sorted( + list(questions_df['categoria'].astype(str).unique()) + ) + if not unique_categories_for_filter: + st.info( + "Nessuna categoria definita nelle domande esistenti per poter filtrare." + ) + + selected_categories = st.multiselect( + "Filtra per categorie (mostra i set che contengono almeno una domanda da " + "OGNI categoria selezionata):", + options=unique_categories_for_filter, + default=[], + key="filter_categories", + ) + + if sets_ready and not st.session_state.question_sets.empty: + question_sets_df = st.session_state.question_sets + display_sets_df = question_sets_df.copy() # Inizia con tutti i set + + if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti + filtered_set_indices = [] + for idx, set_row in question_sets_df.iterrows(): + question_ids_in_set = set_row.get('questions', []) + if not isinstance(question_ids_in_set, list): + question_ids_in_set = [] + + if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro + continue + + categories_present_in_set = set() + for q_id in question_ids_in_set: + category = get_question_category(str(q_id), questions_df) + categories_present_in_set.add(category) + + # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata + if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): + filtered_set_indices.append(idx) + + display_sets_df = question_sets_df.loc[filtered_set_indices] + + if display_sets_df.empty and selected_categories: + st.info( + "Nessun set trovato che contenga domande da tutte le categorie selezionate: " + f"{', '.join(selected_categories)}." + ) + elif display_sets_df.empty and not selected_categories: + st.info( + "Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'." + ) + + for idx, row in display_sets_df.iterrows(): + exp_key = f"set_expander_{row['id']}" + if exp_key not in st.session_state.set_expanders: + st.session_state.set_expanders[exp_key] = False + + with st.expander( + f"Set: {row['name']}", + expanded=st.session_state.set_expanders.get(exp_key, False), + ): + col1, col2 = st.columns([3, 1]) + + with col1: + _ = st.text_input( + "Nome Set", + value=row['name'], + key=f"set_name_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + + st.subheader("Domande in questo Set") + current_question_ids_in_set = row.get('questions', []) + if not isinstance(current_question_ids_in_set, list): + current_question_ids_in_set = [] + + if row['id'] not in st.session_state.question_checkboxes: + st.session_state.question_checkboxes[row['id']] = {} + + if current_question_ids_in_set: + for q_id in current_question_ids_in_set: + q_text = get_question_text(str(q_id)) + q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' + display_text = f"{q_text} (Categoria: {q_cat})" + + # 使用回调来更新checkbox状态 + checkbox_value = st.checkbox( + display_text, + value=True, + key=f"qcheck_{row['id']}_{q_id}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value + else: + st.info("Nessuna domanda in questo set.") + + st.subheader("Aggiungi Domande al Set") + + # 初始化新选择的问题状态 + if row['id'] not in st.session_state.newly_selected_questions: + st.session_state.newly_selected_questions[row['id']] = [] + + if questions_ready: + all_questions_df = st.session_state.questions + available_questions_df = all_questions_df[ + ~all_questions_df['id'].astype(str).isin( + [str(q_id) for q_id in current_question_ids_in_set] + ) + ] + + if not available_questions_df.empty: + question_dict_for_multiselect = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" + for q_id, q_text in zip( + available_questions_df['id'].astype(str), + available_questions_df['domanda'], + ) + } + newly_selected_questions_ids = st.multiselect( + "Seleziona domande da aggiungere", + options=list(question_dict_for_multiselect.keys()), + format_func=lambda x: question_dict_for_multiselect.get(x, x), + key=f"add_q_{row['id']}", + on_change=mark_expander_open, + args=(exp_key,) + ) + st.session_state.newly_selected_questions[row['id']] = newly_selected_questions_ids + else: + st.info("Nessuna altra domanda disponibile da aggiungere.") + else: + st.info("Le domande non sono disponibili per la selezione (dati mancanti o incompleti).") + + with col2: + st.button( + "Salva Modifiche", + key=f"save_set_{row['id']}", + on_click=create_save_set_callback(row['id'], exp_key, state) + ) + + # Pulsante Elimina con dialog di conferma + if st.button( + "Elimina Set", + key=f"delete_set_{row['id']}", + type="secondary" + ): + mark_expander_open(exp_key) + confirm_delete_set_dialog(row['id'], row['name'], state) + + # Lo stato dell'expander viene aggiornato tramite i callback + + elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): + st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") + + # Scheda Crea Nuovo Set + with tabs[1]: + st.header("Crea Nuovo Set di Domande") + + with st.form("create_set_form"): + set_name = st.text_input("Nome Set", placeholder="Inserisci un nome per il set...") + + selected_qs_for_new_set = [] + questions_ready_for_creation = ( + 'questions' in st.session_state and + not st.session_state.questions.empty and + 'domanda' in st.session_state.questions.columns and + 'categoria' in st.session_state.questions.columns + ) + + if questions_ready_for_creation: + all_questions_df_creation = st.session_state.questions + question_dict_for_creation = { + q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" + for q_id, q_text in zip( + all_questions_df_creation['id'].astype(str), + all_questions_df_creation['domanda'], + ) + } + + selected_qs_for_new_set = st.multiselect( + "Seleziona domande per questo set", + options=list(question_dict_for_creation.keys()), + format_func=lambda x: question_dict_for_creation.get(x, x), + key="create_set_questions", + ) + else: + st.info( + "Nessuna domanda disponibile o dati delle domande non pronti (incl. categorie). \n" + "Vai a 'Gestione Domande' per aggiungere/caricare domande." + ) + + submitted = st.form_submit_button("Crea Set") + + if submitted: + if set_name: + set_id = create_set( + set_name, [str(q_id) for q_id in selected_qs_for_new_set] + ) + st.session_state.question_sets = load_sets() + state.create_set_success_message = ( + f"Set di domande creato con successo con ID: {set_id}" + ) + state.create_set_success = True + state.trigger_rerun = True + st.rerun() + else: + st.error("Il nome del set è obbligatorio.") + + # Scheda Importa da File + with tabs[2]: + st.header("Importa Set da File") + + st.write(""" + Carica un file JSON o CSV contenente uno o più set di domande. + + ### Formato File JSON per Set Multipli: + ```json + [ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } + ] + ``` + + ### Formato CSV: + Ogni riga deve contenere le colonne ``name`` (nome del set), ``id`` + (ID della domanda), ``domanda`` (testo), ``risposta_attesa`` e + ``categoria``. + ```csv + name,id,domanda,risposta_attesa,categoria + Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia + Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia + Matematica Base,3,Quanto fa 2+2?,4,Matematica + Matematica Base,4,Quanto fa 10*4?,40,Matematica + ``` + + ### Note Importanti: + - Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente + - Se un set con lo stesso nome esiste già, verrà saltato + - Solo le domande nuove verranno aggiunte al database + - Le domande esistenti verranno referenziate nei nuovi set + """) + + uploaded_file = st.file_uploader( + "Scegli un file", type=["json", "csv"], key="upload_set_file" + ) + + if uploaded_file is not None: + st.session_state.uploaded_file_content_set = uploaded_file + st.button( + "Importa Set", + key="import_set_btn", + on_click=lambda: import_set_callback(state) + ) diff --git a/views/home.py b/views/home.py new file mode 100644 index 0000000..fd7d96e --- /dev/null +++ b/views/home.py @@ -0,0 +1,99 @@ +"""Home page view module for the Streamlit application.""" + +import logging + +import streamlit as st +from .style_utils import add_home_styles +logger = logging.getLogger(__name__) + + +def render(): + """Visualizza la pagina principale con le funzionalità della piattaforma.""" + + add_home_styles() + + st.markdown( + """ +
+

🧠 Piattaforma di Valutazione LLM

+

Una piattaforma completa per valutare le risposte LLM con diversi provider AI

+
+""", + unsafe_allow_html=True, + ) + + # Box delle funzionalità con icone e stile migliorato + col1, col2 = st.columns(2) + + with col1: + st.markdown( + """ +
+

+ 📋 + Gestione delle Domande +

+

+ Crea, modifica e organizza le tue domande di test con le risposte previste. + Costruisci set di test completi per valutare le risposte LLM in modo efficiente. +

+
+ +
+

+ 🔌 + Supporto Multi-Provider API +

+

+ Connettiti a OpenAI, Anthropic o X.AI con selezione personalizzata del modello. + Configura parametri API e verifica le connessioni con feedback in tempo reale. +

+
+ """, + unsafe_allow_html=True, + ) + + with col2: + st.markdown( + """ +
+

+ 🧪 + Valutazione Automatizzata +

+

+ Esegui test con punteggio automatico rispetto alle risposte previste. + Valuta la somiglianza semantica tra testi con modelli linguistici. +

+
+ +
+

+ 📊 + Analisi Avanzata +

+

+ Visualizza i risultati dei test con grafici interattivi e metriche dettagliate. + Analizza parole chiave mancanti e ottieni suggerimenti di miglioramento specifici. +

+
+ """, + unsafe_allow_html=True, + ) + + st.markdown( + """ +
+

🚀 Iniziare

+
    +
  1. Configura le tue credenziali API nella pagina Configurazione API
  2. +
  3. Crea domande e risposte previste nella pagina Gestione Domande
  4. +
  5. Organizza le domande in set nella pagina Gestione Set di Domande
  6. +
  7. Esegui valutazioni nella pagina Esecuzione Test
  8. +
  9. Visualizza e analizza i risultati nella pagina Visualizzazione Risultati
  10. +
+

Utilizza la barra laterale a sinistra per navigare tra queste funzionalità.

+
+""", + unsafe_allow_html=True, + ) diff --git a/views/session_state.py b/views/session_state.py new file mode 100644 index 0000000..8d9d84d --- /dev/null +++ b/views/session_state.py @@ -0,0 +1,33 @@ +import logging + +import streamlit as st + +from controllers import get_initial_state +logger = logging.getLogger(__name__) + + +def ensure_keys(defaults: dict) -> None: + """Garantisce la presenza delle chiavi in ``st.session_state``. + + Parametri: + defaults: Dizionario con chiavi e valori da impostare se mancanti. + """ + for key, value in defaults.items(): + st.session_state.setdefault(key, value) + + +def initialize_session_state() -> None: + """Inizializza ``st.session_state`` con i valori di default.""" + required_keys = [ + "questions", + "question_sets", + "results", + "api_key", + "endpoint", + "model", + "temperature", + "max_tokens", + ] + if any(key not in st.session_state for key in required_keys): + defaults = get_initial_state() + ensure_keys(defaults) diff --git a/views/set_helpers.py b/views/set_helpers.py new file mode 100644 index 0000000..89b96b1 --- /dev/null +++ b/views/set_helpers.py @@ -0,0 +1,140 @@ +import logging + +import streamlit as st + +from controllers import ( + update_set, + delete_set, + refresh_question_sets, + import_sets_from_file, + refresh_questions, +) +from .state_models import SetPageState +logger = logging.getLogger(__name__) + + +def save_set_callback( + set_id: str, + edited_name: str, + question_options_checkboxes: dict, + newly_selected_questions_ids: list[str], + state: SetPageState, +) -> None: + kept_questions_ids = [q_id for q_id, keep in question_options_checkboxes.items() if keep] + updated_questions_ids = list( + set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids]) + ) + + update_set(set_id, edited_name, updated_questions_ids) + state.save_set_success_message = "Set di domande aggiornato con successo!" + state.save_set_success = True + st.session_state.question_sets = refresh_question_sets() + state.trigger_rerun = True + + +def delete_set_callback(set_id: str, state: SetPageState): + delete_set(set_id) + state.delete_set_success_message = "Set di domande eliminato con successo!" + state.delete_set_success = True + st.session_state.question_sets = refresh_question_sets() + state.trigger_rerun = True + + +@st.dialog("Conferma Eliminazione") +def confirm_delete_set_dialog(set_id: str, set_name: str, state: SetPageState): + """Dialog di conferma per l'eliminazione del set di domande""" + st.write(f"Sei sicuro di voler eliminare il set '{set_name}'?") + st.warning("Questa azione non può essere annullata.") + + col1, col2 = st.columns(2) + + with col1: + if st.button("Sì, Elimina", type="primary", use_container_width=True): + delete_set_callback(set_id, state) + st.rerun() + + with col2: + if st.button("No, Annulla", use_container_width=True): + st.rerun() + + +def import_set_callback(state: SetPageState): + """Importa uno o più set di domande da file JSON o CSV.""" + + state.import_set_success = False + state.import_set_error = False + state.import_set_success_message = "" + state.import_set_error_message = "" + + uploaded_file = st.session_state.get("uploaded_file_content_set") + result = import_sets_from_file(uploaded_file) + + if result["success"]: + state.import_set_success = True + state.import_set_success_message = result["success_message"] + st.session_state.questions = refresh_questions() + st.session_state.question_sets = refresh_question_sets() + else: + state.import_set_error = True + state.import_set_error_message = result["error_message"] + + for warn in result.get("warnings", []): + st.warning(warn) + + st.session_state.uploaded_file_content_set = None + st.session_state.pop("upload_set_file", None) + state.trigger_rerun = True + + +def get_question_text(question_id: str) -> str: + """Ritorna il testo della domanda dato il suo ID.""" + if "questions" in st.session_state and not st.session_state.questions.empty: + if "domanda" not in st.session_state.questions.columns: + st.session_state.questions = refresh_questions() + if "domanda" not in st.session_state.questions.columns: + return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" + + question_row = st.session_state.questions[st.session_state.questions["id"] == str(question_id)] + if not question_row.empty: + return question_row.iloc[0]["domanda"] + return f"ID Domanda: {question_id} (non trovata o dati non caricati)" + + +def get_question_category(question_id: str, questions_df): + """Ritorna la categoria di una domanda dato il suo ID.""" + if questions_df is not None and not questions_df.empty and "categoria" in questions_df.columns: + question_row = questions_df[questions_df["id"] == str(question_id)] + if not question_row.empty: + return question_row.iloc[0]["categoria"] + return "N/A" + + +def mark_expander_open(exp_key: str): + """Segna l'expander come aperto nello stato di sessione""" + if "set_expanders" in st.session_state: + st.session_state.set_expanders[exp_key] = True + + +def create_save_set_callback(set_id: str, exp_key: str, state: SetPageState): + def callback(): + mark_expander_open(exp_key) + edited_name = st.session_state.get(f"set_name_{set_id}", "") + question_options_checkboxes = st.session_state.question_checkboxes.get(set_id, {}) + newly_selected_questions_ids = st.session_state.newly_selected_questions.get(set_id, []) + + save_set_callback( + set_id, + edited_name, + question_options_checkboxes, + newly_selected_questions_ids, + state, + ) + + return callback + + +def create_delete_set_callback(set_id: str, state: SetPageState): + def callback(): + delete_set_callback(set_id, state) + + return callback diff --git a/views/state_models.py b/views/state_models.py new file mode 100644 index 0000000..9c5d020 --- /dev/null +++ b/views/state_models.py @@ -0,0 +1,50 @@ +import logging + +from dataclasses import dataclass +logger = logging.getLogger(__name__) + + +@dataclass +class SetPageState: + """Transient UI state for the question set management page.""" + + save_set_success: bool = False + save_set_success_message: str = "Set aggiornato con successo!" + save_set_error: bool = False + save_set_error_message: str = "Errore durante l'aggiornamento del set." + + delete_set_success: bool = False + delete_set_success_message: str = "Set eliminato con successo!" + + create_set_success: bool = False + create_set_success_message: str = "Set creato con successo!" + + import_set_success: bool = False + import_set_success_message: str = "Importazione completata con successo!" + import_set_error: bool = False + import_set_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False + + +@dataclass +class QuestionPageState: + """Transient UI state for the question management page.""" + + save_success: bool = False + save_success_message: str = "Domanda aggiornata con successo!" + save_error: bool = False + save_error_message: str = "Impossibile aggiornare la domanda." + + delete_success: bool = False + delete_success_message: str = "Domanda eliminata con successo!" + + add_success: bool = False + add_success_message: str = "Domanda aggiunta con successo!" + + import_success: bool = False + import_success_message: str = "Importazione completata con successo!" + import_error: bool = False + import_error_message: str = "Errore durante l'importazione." + + trigger_rerun: bool = False diff --git a/views/style_utils.py b/views/style_utils.py new file mode 100644 index 0000000..bd7deeb --- /dev/null +++ b/views/style_utils.py @@ -0,0 +1,374 @@ +"""Funzioni di utilità per applicare stili CSS nelle viste Streamlit. + +Centralizza l'iniezione di CSS per favorirne il riuso tra le pagine. +""" + +import logging + +import streamlit as st +logger = logging.getLogger(__name__) + + +def add_global_styles(): + """Aggiunge stili globali all'applicazione.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + +def add_page_header(title: str, icon: str = "💡", description: str | None = None): + """Aggiunge un'intestazione di pagina stilizzata.""" + add_global_styles() + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + st.markdown( + f""" + +
+ """, + unsafe_allow_html=True, + ) + + +def add_section_title(title: str, icon: str | None = None): + """Aggiunge un titolo di sezione stilizzato.""" + icon_text = f"{icon} " if icon else "" + st.markdown( + f"
{icon_text}{title}
", + unsafe_allow_html=True, + ) + + +def add_home_styles(): + """Applica gli stili CSS specifici della home page. + + Migliora la visibilità degli input nei temi chiaro e scuro e definisce + l'aspetto degli elementi principali come box funzionali e sezioni di + benvenuto. + """ + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) diff --git a/views/ui_utils.py b/views/ui_utils.py new file mode 100644 index 0000000..d1adb95 --- /dev/null +++ b/views/ui_utils.py @@ -0,0 +1,13 @@ +import logging + +from .style_utils import add_global_styles, add_page_header, add_section_title +from .component_utils import create_card, create_metrics_container +logger = logging.getLogger(__name__) + +__all__ = [ + "add_global_styles", + "add_page_header", + "add_section_title", + "create_card", + "create_metrics_container", +] diff --git a/views/visualizza_risultati.py b/views/visualizza_risultati.py new file mode 100644 index 0000000..d7f043a --- /dev/null +++ b/views/visualizza_risultati.py @@ -0,0 +1,469 @@ +import logging + +import streamlit as st +import pandas as pd +import json +import plotly.express as px +import plotly.graph_objects as go + +from controllers import ( + import_results_from_file, + load_results, + refresh_results, + calculate_statistics, + load_sets, + load_presets, +) +from views.style_utils import add_page_header, add_section_title +logger = logging.getLogger(__name__) + + +def render(): + add_page_header( + "Visualizzazione Risultati Test", + icon="📊", + description="Analizza e visualizza i risultati dettagliati delle valutazioni dei test eseguiti." + ) + + # Carica i risultati utilizzando la cache + if 'results' not in st.session_state: + st.session_state.results = load_results() + if st.session_state.results.empty: + st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") + st.stop() + + # Carica i set di domande utilizzando la cache + if 'question_sets' not in st.session_state: + st.session_state.question_sets = load_sets() + + # Carica i preset API utilizzando la cache + if 'api_presets' not in st.session_state: + st.session_state.api_presets = load_presets() + + # Stato per messaggi di importazione risultati + if 'import_results_success' not in st.session_state: + st.session_state.import_results_success = False + if 'import_results_error' not in st.session_state: + st.session_state.import_results_error = False + if 'import_results_message' not in st.session_state: + st.session_state.import_results_message = "" + + if st.session_state.import_results_success: + st.success(st.session_state.import_results_message) + st.session_state.import_results_success = False + if st.session_state.import_results_error: + st.error(st.session_state.import_results_message) + st.session_state.import_results_error = False + + def get_set_name(set_id): + if not st.session_state.question_sets.empty: + set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] + if not set_info.empty: + return set_info.iloc[0]['name'] + return "Set Sconosciuto" + + def get_model_from_preset_name(preset_name): + """Restituisce il modello associato a un preset, se disponibile.""" + if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: + preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] + if not preset_row.empty: + return preset_row.iloc[0]['model'] + return "Sconosciuto" + + def import_results_callback(): + """Callback per importare risultati da file JSON.""" + if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: + success, message = import_results_from_file(st.session_state.uploaded_results_file) + st.session_state.import_results_message = message + st.session_state.import_results_success = success + st.session_state.import_results_error = not success + if success: + st.session_state.results = refresh_results() + st.session_state.uploaded_results_file = None + st.session_state.upload_results = None + + # Filtri per Set e Modello LLM + all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) + + all_model_names = sorted({ + r['results']['generation_llm'] + for _, r in st.session_state.results.iterrows() + if r['results'].get('generation_llm') + }) + + selected_set_filter = st.selectbox( + "Filtra per Set", + options=["Tutti"] + all_set_names, + index=0, + key="filter_set_name" + ) + + selected_model_filter = st.selectbox( + "Filtra per Modello LLM usato per la generazione della risposta", + options=["Tutti"] + all_model_names, + index=0, + key="filter_model_name" + ) + + filtered_results_df = st.session_state.results + if selected_set_filter != "Tutti": + set_ids = st.session_state.question_sets[ + st.session_state.question_sets['name'] == selected_set_filter + ]['id'].astype(str) + filtered_results_df = filtered_results_df[ + filtered_results_df['set_id'].astype(str).isin(set_ids) + ] + + if selected_model_filter != "Tutti": + filtered_results_df = filtered_results_df[ + filtered_results_df['results'].apply( + lambda res: res.get('generation_llm') == selected_model_filter + ) + ] + + # Elabora i risultati per la visualizzazione nel selectbox + processed_results_for_select = [] + for _, row in filtered_results_df.iterrows(): + result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli + set_name = get_set_name(row['set_id']) + avg_score = result_data.get('avg_score', 0) + method = result_data.get('method', 'N/A') + method_icon = "🤖" if method == "LLM" else "📊" + + processed_results_for_select.append( + { + 'id': row['id'], + 'display_name': ( + f"{row['timestamp']} - {method_icon} {set_name} " + f"(Avg: {avg_score:.2f}%) - {method}" + ), + } + ) + + processed_results_for_select.sort( + key=lambda x: x['display_name'].split(' - ')[0], + reverse=True, + ) # Ordina per timestamp + + result_options = {r['id']: r['display_name'] for r in processed_results_for_select} + + # Seleziona il risultato da visualizzare + selected_result_id = st.selectbox( + "Seleziona un Risultato del Test da Visualizzare", + options=list(result_options.keys()), + format_func=lambda x: result_options[x], + index=0 if result_options else None, + key="select_test_result_to_view" + ) + + # Opzionalmente seleziona un secondo risultato per il confronto + # Rimuove l'opzione del risultato attualmente selezionato per evitare di confrontare il test con se stesso + compare_options = [rid for rid in result_options.keys() if rid != selected_result_id] + compare_result_id = st.selectbox( + "Confronta con un altro risultato (opzionale)", + options=[None] + compare_options, + format_func=lambda x: "Nessun confronto" if x is None else result_options[x], + index=0, + key="select_test_result_compare" + ) + if not selected_result_id: + st.info("Nessun risultato selezionato o disponibile.") + st.stop() + + # Ottieni i dati del risultato selezionato + selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] + result_data = selected_result_row['results'] + set_name = get_set_name(selected_result_row['set_id']) + questions_results = result_data.get('questions', {}) + + with st.expander("Esporta/Importa Risultati"): + col_exp, col_imp = st.columns(2) + with col_exp: + selected_json = json.dumps({ + 'id': selected_result_row['id'], + 'set_id': selected_result_row['set_id'], + 'timestamp': selected_result_row['timestamp'], + 'results': result_data + }, indent=2) + st.download_button( + "Export Risultato Selezionato", + selected_json, + file_name=f"result_{selected_result_row['id']}.json", + mime="application/json" + ) + + all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) + st.download_button( + "Export Tutti i Risultati", + all_json, + file_name="all_results.json", + mime="application/json" + ) + + with col_imp: + uploaded_file = st.file_uploader("Seleziona file JSON", type=["json"], key="upload_results") + if uploaded_file is not None: + st.session_state.uploaded_results_file = uploaded_file + st.button( + "Importa Risultati", + on_click=import_results_callback, + key="import_results_btn" + ) + + # Carica eventuale risultato di confronto + compare_result_row = None + compare_result_data = None + compare_questions_results = {} + if compare_result_id: + compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] + compare_result_data = compare_result_row['results'] + compare_questions_results = compare_result_data.get('questions', {}) + + # Visualizza informazioni generali sul risultato + evaluation_method = result_data.get('method', 'LLM') + method_icon = "🤖" if evaluation_method == "LLM" else "📊" + method_desc = "Valutazione LLM" if evaluation_method == "LLM" else "Metodo sconosciuto" + + add_section_title(f"Dettaglio Test: {set_name} [{method_icon} {evaluation_method}]", icon="📄") + st.markdown(f"**ID Risultato:** `{selected_result_id}`") + st.markdown(f"**Eseguito il:** {selected_result_row['timestamp']}") + st.markdown(f"**Metodo di Valutazione:** {method_icon} **{method_desc}**") + + if 'generation_llm' in result_data: + st.markdown(f"**LLM Generazione Risposte:** `{result_data['generation_llm']}`") + elif 'generation_preset' in result_data: + st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") + if evaluation_method == "LLM": + if 'evaluation_llm' in result_data: + st.markdown(f"**LLM Valutazione Risposte:** `{result_data['evaluation_llm']}`") + elif 'evaluation_preset' in result_data: + st.markdown( + f"**Preset Valutazione Risposte (LLM):** `{result_data['evaluation_preset']}`" + ) + + # Metriche Generali del Test + add_section_title("Metriche Generali del Test", icon="📈") + + if questions_results: + stats = calculate_statistics(questions_results) + avg_score_overall = stats["avg_score"] + num_questions = len(stats["per_question_scores"]) + + cols_metrics = st.columns(2) + with cols_metrics[0]: + st.metric("Punteggio Medio Complessivo", f"{avg_score_overall:.2f}%") + with cols_metrics[1]: + st.metric("Numero di Domande Valutate", num_questions) + + compare_stats = None + if compare_result_row is not None: + compare_stats = calculate_statistics(compare_questions_results) + compare_avg = compare_stats["avg_score"] + diff_avg = compare_avg - avg_score_overall + st.markdown("### Confronto") + cols_cmp = st.columns(3) + cols_cmp[0].metric("Punteggio Selezionato", f"{avg_score_overall:.2f}%") + cols_cmp[1].metric("Punteggio Confronto", f"{compare_avg:.2f}%") + cols_cmp[2].metric("Differenza", f"{diff_avg:+.2f}%") + + scores_data = [] + for item in stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Selezionato"}) + if compare_stats: + for item in compare_stats["per_question_scores"]: + label = item["question"] + label = label[:50] + "..." if len(label) > 50 else label + scores_data.append({"Domanda": label, "Punteggio": item["score"], "Tipo": "Confronto"}) + + if scores_data: + df_scores = pd.DataFrame(scores_data) + unique_questions = len({d['Domanda'] for d in scores_data}) + fig = px.bar( + df_scores, + x='Domanda', + y='Punteggio', + color='Tipo', + barmode='group', + title="Punteggi per Domanda", + height=max(400, unique_questions * 30), + ) + fig.update_layout(yaxis_range=[0, 100]) + st.plotly_chart(fig, use_container_width=True) + + if evaluation_method == "LLM": + categories = ['Somiglianza', 'Correttezza', 'Completezza'] + fig_radar = go.Figure() + rm = stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[rm['similarity'], rm['correctness'], rm['completeness']], + theta=categories, + fill='toself', + name='Selezionato', + ) + ) + if compare_stats: + crm = compare_stats["radar_metrics"] + fig_radar.add_trace( + go.Scatterpolar( + r=[crm['similarity'], crm['correctness'], crm['completeness']], + theta=categories, + fill='toself', + name='Confronto', + ) + ) + fig_radar.update_layout( + title="Grafico Radar delle Metriche LLM", + polar=dict(radialaxis=dict(visible=True, range=[0, 100])), + showlegend=True, + legend=dict( + orientation="h", + yanchor="bottom", + y=-0.2, + xanchor="center", + x=0.5, + ), + height=600, + ) + st.plotly_chart(fig_radar, use_container_width=True) + + st.subheader("Valori medi delle metriche") + cols = st.columns(3) + cols[0].metric("Somiglianza", f"{rm['similarity']:.2f}%") + cols[1].metric("Correttezza", f"{rm['correctness']:.2f}%") + cols[2].metric("Completezza", f"{rm['completeness']:.2f}%") + + if compare_stats: + cols_cmp = st.columns(3) + cols_cmp[0].metric("Somiglianza (Confronto)", f"{crm['similarity']:.2f}%") + cols_cmp[1].metric("Correttezza (Confronto)", f"{crm['correctness']:.2f}%") + cols_cmp[2].metric("Completezza (Confronto)", f"{crm['completeness']:.2f}%") + else: + st.info("Nessun dettaglio per le domande disponibile in questo risultato.") + + if compare_result_row is not None: + add_section_title("Confronto Dettagliato per Domanda", icon="🔍") + comparison_rows = [] + all_q_ids = set(questions_results.keys()) | set(compare_questions_results.keys()) + for qid in all_q_ids: + q1 = questions_results.get(qid, {}) + q2 = compare_questions_results.get(qid, {}) + label = q1.get('question') or q2.get('question') or str(qid) + score1 = q1.get('evaluation', {}).get('score', None) + score2 = q2.get('evaluation', {}).get('score', None) + delta = None + if score1 is not None and score2 is not None: + delta = score2 - score1 + comparison_rows.append({ + 'Domanda': label[:50] + ('...' if len(label) > 50 else ''), + 'Selezionato': score1, + 'Confronto': score2, + 'Delta': delta + }) + if comparison_rows: + df_comp = pd.DataFrame(comparison_rows) + st.dataframe(df_comp) + + # Dettagli per ogni domanda + add_section_title("Risultati Dettagliati per Domanda", icon="📝") + if not questions_results: + st.info("Nessuna domanda trovata in questo set di risultati.") + else: + for q_id, q_data in questions_results.items(): + question_text = q_data.get('question', "Testo domanda non disponibile") + expected_answer = q_data.get('expected_answer', "Risposta attesa non disponibile") + actual_answer = q_data.get('actual_answer', "Risposta effettiva non disponibile") + + with st.expander( + f"Domanda: {question_text[:100]}..." + ): + st.markdown(f"**Domanda:** {question_text}") + st.markdown(f"**Risposta Attesa:** {expected_answer}") + st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") + st.divider() + + # Mostra Dettagli API di Generazione (se presenti e richiesti) + generation_api_details = q_data.get('generation_api_details') + if generation_api_details and isinstance(generation_api_details, dict): + with st.container(): + st.markdown("###### Dettagli Chiamata API di Generazione Risposta") + if generation_api_details.get('request'): + st.caption("Richiesta API Generazione:") + st.json( + generation_api_details['request'], expanded=False + ) + if generation_api_details.get('response_content'): + st.caption("Contenuto Risposta API Generazione:") + # Prova a formattare se è una stringa JSON, altrimenti mostra com'è + try: + content = generation_api_details['response_content'] + if isinstance(content, str): + response_data_gen = json.loads(content) + else: + response_data_gen = content + st.code( + json.dumps(response_data_gen, indent=2), + language="json", + ) + except Exception: + st.text( + generation_api_details['response_content'] + ) + if generation_api_details.get('error'): + st.caption("Errore API Generazione:") + st.error(generation_api_details['error']) + st.divider() + + if evaluation_method == "LLM": + evaluation = q_data.get( + 'evaluation', {} + ) # Assicurati che evaluation sia sempre un dizionario + st.markdown("##### Valutazione LLM") + score = evaluation.get('score', 0) + explanation = evaluation.get( + 'explanation', "Nessuna spiegazione." + ) + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") + st.markdown(f"**Spiegazione:** {explanation}") + + cols_eval_metrics = st.columns(3) + cols_eval_metrics[0].metric( + "Somiglianza", f"{similarity:.2f}%" + ) + cols_eval_metrics[1].metric( + "Correttezza", f"{correctness:.2f}%" + ) + cols_eval_metrics[2].metric( + "Completezza", f"{completeness:.2f}%" + ) + + api_details = evaluation.get('api_details') + if api_details and isinstance(api_details, dict): + with st.container(): # Sostituisce l'expander interno + st.markdown( + "###### Dettagli Chiamata API di Valutazione" + ) + if api_details.get('request'): + st.caption("Richiesta API:") + st.json(api_details['request'], expanded=False) + if api_details.get('response_content'): + st.caption("Contenuto Risposta API:") + content = api_details['response_content'] + parsed = json.loads(content) if isinstance( + content, str + ) else content + st.code( + json.dumps(parsed, indent=2), + language="json", + ) + if api_details.get('error'): + st.caption("Errore API:") + st.error(api_details['error']) + + st.markdown("--- --- ---") From 8c08a6b4d1da9c0f2d59570f5f8ff753b8d66cf5 Mon Sep 17 00:00:00 2001 From: oniichan Date: Thu, 7 Aug 2025 13:27:08 +0200 Subject: [PATCH 06/41] fixed port issue --- docker-compose.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f0920a1..64a3b52 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,11 +2,8 @@ services: db: image: mysql:8.0 container_name: db - restart: always - ports: - - "3306:3306" volumes: - - ./data/:/var/lib/mysql + - db_data:/var/lib/mysql environment: MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' MYSQL_ROOT_HOST: '%' @@ -28,5 +25,6 @@ services: volumes: db_data: + networks: llm-network: From 827232eb2f6e8c1f659d58adee0b27a1d05129c7 Mon Sep 17 00:00:00 2001 From: oniichan Date: Thu, 7 Aug 2025 13:41:14 +0200 Subject: [PATCH 07/41] modificato il testo --- views/esecuzione_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py index a45cb9c..5c4bd4a 100644 --- a/views/esecuzione_test.py +++ b/views/esecuzione_test.py @@ -153,8 +153,8 @@ def get_preset_config_by_name(name): st.session_state.results = exec_result['results_df'] st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") - # Visualizzazione risultati dettagliati - st.subheader("Risultati Dettagliati") + # Visualizzazione risultati + st.subheader("Risultati riassuntivi del test(Per visualizzare tutti i dettagli, vai alla pagina Visualizza Risultati)") for q_id, result in exec_result['results'].items(): with st.expander( f"Domanda: {result['question'][:50]}..." From 55397db9b84df7ddbb67ace8bd21c34f76512682 Mon Sep 17 00:00:00 2001 From: Marco Cola Date: Thu, 7 Aug 2025 16:51:30 +0200 Subject: [PATCH 08/41] Update README.md --- README.md | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4dc2eca..0c78c0d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,161 @@ -# MVP -Repository per il Minimum Viable Product della fase PB +# Artificial QI – Manuale di Avvio 🧠⚙️ + +Piattaforma sviluppata dal gruppo **7Commits** per la valutazione delle prestazioni di LLM (Large Language Models). + +## 👥 Componenti del Gruppo: +- **Marco Cola** - 2079237 +- **Ruize Lin** - 2068236 +- **Stefano Dal Poz** - 1204683 +- **Giulia Hu** - 2009118 +- **Mattia Piva** - 2008065 +- **Giada Rossi** - 2045353 + +--- + +## 📦 Requisiti + +> Puoi eseguire la webapp **con Docker** (consigliato) oppure **con Python + MySQL** in locale. + +### 🔧 Requisiti comuni + +- [x] **Git** +- [x] File `db.config` (creato da `db.config.example`) + +--- + +### 📦 Requisiti per l’**esecuzione con Docker** (consigliato) + +- [x] Docker ≥ 20.10 +- [x] Docker Compose + +### 🐍 Requisiti per l’**esecuzione con Python** + +- [x] Python ≥ 3.10 +- [x] MySQL ≥ 5.7 +- [x] `pip install -r requirements.txt` + +--- + +## 📁 Clonazione del progetto + +```bash +git clone https://github.com/7Commits/MVP +cd MVP +cp db.config.example db.config + +``` + +## 🐳 Avvio con Docker + +Assicurati che il file db.config contenga: +```bash +[mysql] +user = root +password = root +host = db +port = 3306 +database = llm_platform +``` + +poi esegui: +```bash +docker compose up -d --build +``` + +infine accedi alla webapp: +```bash +http://localhost:8501 +``` + +## 🛑 Arresto dell'app (Docker) + +Per mettere in pausa: + +```bash +docker compose stop +``` + +Per spegnere e rimuovere: + +```bash +docker compose down +``` + +## 🐍 Avvio con Python + MySQL locale + +1. Avvia il tuo server MySQL locale (con porta, user e password compatibili) + +2. Modifica db.config così: +```bash +[mysql] +user = tuo_user +password = tua_password +host = localhost +port = 3306 +database = llm_platform +``` +3. Installa le dipendenze Python: +```bash +pip install -r requirements.txt +``` + +4. Avvia l'app con: +```bash +python -m streamlit run app.py --server.port 8501 +``` + +# 🧪 Guida all’Uso dell’Applicazione + +## 🏠 Pagina Home + +La pagina iniziale della webapp mostra una descrizione dell’app e un menu laterale con le varie sezioni disponibili. + +--- + +## ⚙️ Configurazione API + +In questa sezione puoi creare dei **preset** di connessione per i vari LLM, inserendo: + +- Nome del preset +- Chiave API +- Endpoint del provider (URL) +- Modello da utilizzare +- Temperatura di generazione + +Puoi salvare, modificare o eliminare preset già configurati. + +--- + +## ❓ Gestione Domande + +Qui puoi gestire le **domande e risposte attese**: + +- Inserire manualmente domanda e risposta +- Modificare o eliminare una voce +- Importare un file `.csv` o `.json` con domande e risposte attese + +### 📄 Formato richiesto per importazione + +```csv +domanda,risposta_attesa,categoria (opzionale) +Qual è la capitale d'Italia?,Roma,Geografia +Chi ha scritto '1984'?,George Orwell,Letteratura +``` + +## 💬 Supporto tecnico + +In caso di problemi o domande, contattare: + +- 📧 Email: [7commits@gmail.com](mailto:7commits@gmail.com) + +Inserendo eventuali messaggi di errore e una breve descrizione del problema. + + + + + + + + + + From 5e61940e641d45f81135bf04df5c8e295a431a4a Mon Sep 17 00:00:00 2001 From: Marco Cola Date: Thu, 7 Aug 2025 17:02:42 +0200 Subject: [PATCH 09/41] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0c78c0d..5125318 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Assicurati che il file db.config contenga: ```bash [mysql] user = root -password = root +password = host = db port = 3306 database = llm_platform From d4a4acf3d6c48daa6c7be6bca88bdd0dceccbe5a Mon Sep 17 00:00:00 2001 From: Marco Cola Date: Thu, 7 Aug 2025 17:05:04 +0200 Subject: [PATCH 10/41] Update README.md --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 5125318..292bd04 100644 --- a/README.md +++ b/README.md @@ -50,11 +50,11 @@ cp db.config.example db.config Assicurati che il file db.config contenga: ```bash [mysql] -user = root -password = -host = db -port = 3306 -database = llm_platform +host=db +user=root +password= +database=llm_platform +port=3306 ``` poi esegui: @@ -88,11 +88,12 @@ docker compose down 2. Modifica db.config così: ```bash [mysql] -user = tuo_user -password = tua_password -host = localhost -port = 3306 -database = llm_platform +host=localhost +user=root +password=your_password_here +database=llm_platform +port=3306 +ssl_ca= ``` 3. Installa le dipendenze Python: ```bash From 45ea95578f2da34e120d525d7be6c662a1fa8567 Mon Sep 17 00:00:00 2001 From: Marco Cola Date: Thu, 7 Aug 2025 17:16:07 +0200 Subject: [PATCH 11/41] Update README.md --- README.md | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 292bd04..bce4c31 100644 --- a/README.md +++ b/README.md @@ -131,18 +131,99 @@ Puoi salvare, modificare o eliminare preset già configurati. Qui puoi gestire le **domande e risposte attese**: -- Inserire manualmente domanda e risposta +- Inserire manualmente domanda e risposta, oppure set di domande e risposte - Modificare o eliminare una voce - Importare un file `.csv` o `.json` con domande e risposte attese -### 📄 Formato richiesto per importazione +### 📄 Esempio di formato richiesto per importazione domande e risposte CSV +Deve includere le colonne 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. +```csv +domanda,risposta_attesa,categoria +"Quanto fa 2+2?","4","Matematica Base" +"Qual è la capitale della Francia?","Parigi","Geografia" +"Chi ha scritto 'Amleto'?","William Shakespeare","Letteratura" +``` +### 📄 Esempio di formato richiesto per importazione domande e risposte JSON +Deve contenere un array di oggetti con i campi 'domanda' e 'risposta_attesa'. Può includere opzionalmente 'categoria'. +```json +[ + { + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica Base" + }, + { + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "domanda": "Chi ha scritto 'Romeo e Giulietta'?", + "risposta_attesa": "William Shakespeare" + } +] +``` + +### 📄 Esempio di formato richiesto per importazione set di domande e risposte CSV +Ogni riga deve contenere le colonne name (nome del set), id (ID della domanda), domanda (testo), risposta_attesa e categoria. ```csv -domanda,risposta_attesa,categoria (opzionale) -Qual è la capitale d'Italia?,Roma,Geografia -Chi ha scritto '1984'?,George Orwell,Letteratura +name,id,domanda,risposta_attesa,categoria +Capitali,1,Qual è la capitale della Francia?,Parigi,Geografia +Capitali,2,Qual è la capitale della Germania?,Berlino,Geografia +Matematica Base,3,Quanto fa 2+2?,4,Matematica +Matematica Base,4,Quanto fa 10*4?,40,Matematica ``` + +### 📄 Esempio di formato richiesto per importazione set di domande e risposte JSON + +```json +[ + { + "name": "Capitali", + "questions": [ + { + "id": "1", + "domanda": "Qual è la capitale della Francia?", + "risposta_attesa": "Parigi", + "categoria": "Geografia" + }, + { + "id": "2", + "domanda": "Qual è la capitale della Germania?", + "risposta_attesa": "Berlino", + "categoria": "Geografia" + } + ] + }, + { + "name": "Matematica Base", + "questions": [ + { + "id": "3", + "domanda": "Quanto fa 2+2?", + "risposta_attesa": "4", + "categoria": "Matematica" + }, + { + "id": "4", + "domanda": "Quanto fa 10*4?", + "risposta_attesa": "40", + "categoria": "Matematica" + } + ] + } +] +``` +#### Note importazione: + +- Se una domanda con lo stesso ID esiste già, non verrà aggiunta nuovamente +- Se un set con lo stesso nome esiste già, verrà saltato +- Solo le domande nuove verranno aggiunte al database +- Le domande esistenti verranno referenziate nei nuovi set + + ## 💬 Supporto tecnico In caso di problemi o domande, contattare: From 27f3409216df8c88fb4555023778953fa947f1f5 Mon Sep 17 00:00:00 2001 From: Marco Cola Date: Thu, 7 Aug 2025 17:19:05 +0200 Subject: [PATCH 12/41] Update README.md --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bce4c31..fe4858f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Artificial QI – Manuale di Avvio 🧠⚙️ +# Artificial QI – Minimum Viable Product🧠⚙️ Piattaforma sviluppata dal gruppo **7Commits** per la valutazione delle prestazioni di LLM (Large Language Models). @@ -63,9 +63,8 @@ docker compose up -d --build ``` infine accedi alla webapp: -```bash -http://localhost:8501 -``` +[localhost:8501](http://localhost:8501) + ## 🛑 Arresto dell'app (Docker) From c087ba2b0d0b77ea109781558b5f225c2b8ae58c Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 8 Aug 2025 10:51:46 +0200 Subject: [PATCH 13/41] some changes --- controllers/openai_client.py | 3 +- controllers/test_controller.py | 74 +++------------------ tests/test_evaluate_answer.py | 3 +- tests/test_openai_controllers.py | 8 +-- views/esecuzione_test.py | 7 +- views/visualizza_risultati.py | 110 ++++++++----------------------- 6 files changed, 45 insertions(+), 160 deletions(-) diff --git a/controllers/openai_client.py b/controllers/openai_client.py index 270c60f..92d5d96 100644 --- a/controllers/openai_client.py +++ b/controllers/openai_client.py @@ -57,7 +57,8 @@ def get_available_models_for_endpoint( if not client: return ["(Errore creazione client API)", DEFAULT_MODEL] try: - models = client.models.list() + models_response = client.models.list() + models = getattr(models_response, "data", models_response) filtered_models = sorted( [ model.id diff --git a/controllers/test_controller.py b/controllers/test_controller.py index a3748e9..6b7c0db 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -149,7 +149,6 @@ def evaluate_answer( expected_answer: str, actual_answer: str, client_config: dict, - show_api_details: bool = False, ): """Valuta una risposta utilizzando un LLM specificato tramite client_config.""" @@ -202,29 +201,19 @@ def evaluate_answer( "response_format": {"type": "json_object"}, } - api_details_for_log = {} - if show_api_details: - api_details_for_log["request"] = api_request_details.copy() - try: response = client.chat.completions.create(**api_request_details) choices = getattr(response, "choices", None) if not choices: logger.error("Risposta API priva di 'choices' validi") - if show_api_details: - api_details_for_log["response_content"] = "" return { "score": 0, "explanation": "Errore: risposta API non valida.", "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log, } content = choices[0].message.content or "{}" - if show_api_details: - api_details_for_log["response_content"] = content - try: evaluation = json.loads(content) required_keys = [ @@ -245,8 +234,6 @@ def evaluate_answer( if key != "explanation" else "Valutazione incompleta o formato JSON non corretto." ) - - evaluation["api_details"] = api_details_for_log return evaluation except json.JSONDecodeError: logger.error( @@ -258,37 +245,32 @@ def evaluate_answer( "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log, } except (APIConnectionError, RateLimitError, APIStatusError) as e: logger.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") - api_details_for_log["error"] = str(e) return { "score": 0, "explanation": f"Errore API: {type(e).__name__}", "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log, } except Exception as exc: # noqa: BLE001 logger.error( f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" ) - api_details_for_log["error"] = str(exc) return { "score": 0, "explanation": f"Errore imprevisto: {type(exc).__name__}", "similarity": 0, "correctness": 0, "completeness": 0, - "api_details": api_details_for_log, } def generate_example_answer_with_llm( - question: str, client_config: dict, show_api_details: bool = False + question: str, client_config: dict ): """Genera una risposta di esempio per una domanda utilizzando un LLM.""" @@ -298,21 +280,11 @@ def generate_example_answer_with_llm( ) if not client: logger.error("Client API per la generazione risposte non configurato.") - return { - "answer": None, - "api_details": {"error": "Client API non configurato"} - if show_api_details - else None, - } + return {"answer": None, "error": "Client API non configurato"} if question is None or not isinstance(question, str) or question.strip() == "": logger.error("La domanda fornita è vuota o non valida.") - return { - "answer": None, - "api_details": {"error": "Domanda vuota o non valida"} - if show_api_details - else None, - } + return {"answer": None, "error": "Domanda vuota o non valida"} prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" @@ -323,10 +295,6 @@ def generate_example_answer_with_llm( "max_tokens": client_config.get("max_tokens", 500), } - api_details_for_log = {} - if show_api_details: - api_details_for_log["request"] = api_request_details.copy() - try: response = client.chat.completions.create(**api_request_details) answer = ( @@ -334,37 +302,18 @@ def generate_example_answer_with_llm( if response.choices and response.choices[0].message.content else None ) - if show_api_details: - api_details_for_log["response_content"] = ( - response.choices[0].message.content - if response.choices - else "Nessun contenuto" - ) - return { - "answer": answer, - "api_details": api_details_for_log if show_api_details else None, - } + return {"answer": answer} except (APIConnectionError, RateLimitError, APIStatusError) as e: logger.error( f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" ) - if show_api_details: - api_details_for_log["error"] = str(e) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } + return {"answer": None, "error": str(e)} except Exception as exc: # noqa: BLE001 logger.error( f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" ) - if show_api_details: - api_details_for_log["error"] = str(exc) - return { - "answer": None, - "api_details": api_details_for_log if show_api_details else None, - } + return {"answer": None, "error": str(exc)} def execute_llm_test( @@ -373,7 +322,6 @@ def execute_llm_test( question_ids: List[str], gen_preset_config: Dict, eval_preset_config: Dict, - show_api_details: bool = False, ) -> Dict: """Esegue la generazione e valutazione delle risposte tramite LLM.""" @@ -403,18 +351,16 @@ def get_question_data(qid: str): generation_output = generate_example_answer_with_llm( q_data["question"], client_config=gen_preset_config, - show_api_details=show_api_details, ) actual_answer = generation_output.get("answer") - generation_api_details = generation_output.get("api_details") if actual_answer is None: + error_msg = generation_output.get("error", "Generazione fallita") results[q_id] = { "question": q_data["question"], "expected_answer": q_data["expected_answer"], - "actual_answer": "Errore Generazione", - "evaluation": {"score": 0, "explanation": "Generazione fallita"}, - "generation_api_details": generation_api_details, + "actual_answer": error_msg, + "evaluation": {"score": 0, "explanation": error_msg}, } continue @@ -423,14 +369,12 @@ def get_question_data(qid: str): q_data["expected_answer"], actual_answer, client_config=eval_preset_config, - show_api_details=show_api_details, ) results[q_id] = { "question": q_data["question"], "expected_answer": q_data["expected_answer"], "actual_answer": actual_answer, "evaluation": evaluation, - "generation_api_details": generation_api_details, } if not results: diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py index 94615b9..fd7b9f9 100644 --- a/tests/test_evaluate_answer.py +++ b/tests/test_evaluate_answer.py @@ -41,12 +41,11 @@ def test_evaluate_answer_success(mock_get_client): ) result = test_controller.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"}, show_api_details=True + "q", "expected", "actual", {"api_key": "key"} ) assert result["score"] == 90 assert result["similarity"] == 90 - assert "api_details" in result @patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py index b916773..a7d5a70 100644 --- a/tests/test_openai_controllers.py +++ b/tests/test_openai_controllers.py @@ -34,11 +34,11 @@ def test_generate_example_answer_success(mock_get_client): @patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) def test_generate_example_answer_no_client(mock_get_client): result = test_controller.generate_example_answer_with_llm( - "question", {"api_key": None}, show_api_details=True + "question", {"api_key": None} ) assert result["answer"] is None - assert result["api_details"]["error"] == "Client API non configurato" + assert result["error"] == "Client API non configurato" @patch("controllers.test_controller.openai_client.get_openai_client") @@ -46,11 +46,11 @@ def test_generate_example_answer_empty_question(mock_get_client): mock_get_client.return_value = Mock() result = test_controller.generate_example_answer_with_llm( - "", {"api_key": "key"}, show_api_details=True + "", {"api_key": "key"} ) assert result["answer"] is None - assert result["api_details"]["error"] == "Domanda vuota o non valida" + assert result["error"] == "Domanda vuota o non valida" @patch("controllers.api_preset_controller.openai_client.get_openai_client") diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py index 5c4bd4a..54837b7 100644 --- a/views/esecuzione_test.py +++ b/views/esecuzione_test.py @@ -114,8 +114,6 @@ def get_preset_config_by_name(name): ) st.session_state.selected_evaluation_preset_name = evaluation_preset_name - show_api_details = st.checkbox("Mostra Dettagli Chiamate API nei Risultati", value=False) - # --- Logica di Esecuzione Test --- test_mode_selected = st.session_state.test_mode @@ -146,15 +144,14 @@ def get_preset_config_by_name(name): questions_in_set, gen_preset_config, eval_preset_config, - show_api_details=show_api_details, ) if exec_result: st.session_state.results = exec_result['results_df'] st.success(f"Test LLM completato! Punteggio medio: {exec_result['avg_score']:.2f}%") - # Visualizzazione risultati - st.subheader("Risultati riassuntivi del test(Per visualizzare tutti i dettagli, vai alla pagina Visualizza Risultati)") + # Visualizzazione risultati dettagliati + st.subheader("Risultati Dettagliati") for q_id, result in exec_result['results'].items(): with st.expander( f"Domanda: {result['question'][:50]}..." diff --git a/views/visualizza_risultati.py b/views/visualizza_risultati.py index d7f043a..d559a3e 100644 --- a/views/visualizza_risultati.py +++ b/views/visualizza_risultati.py @@ -233,7 +233,6 @@ def import_results_callback(): st.markdown(f"**LLM Generazione Risposte:** `{result_data['generation_llm']}`") elif 'generation_preset' in result_data: st.markdown(f"**Preset Generazione Risposte:** `{result_data['generation_preset']}`") - if evaluation_method == "LLM": if 'evaluation_llm' in result_data: st.markdown(f"**LLM Valutazione Risposte:** `{result_data['evaluation_llm']}`") elif 'evaluation_preset' in result_data: @@ -292,7 +291,6 @@ def import_results_callback(): fig.update_layout(yaxis_range=[0, 100]) st.plotly_chart(fig, use_container_width=True) - if evaluation_method == "LLM": categories = ['Somiglianza', 'Correttezza', 'Completezza'] fig_radar = go.Figure() rm = stats["radar_metrics"] @@ -384,86 +382,32 @@ def import_results_callback(): st.markdown(f"**Risposta Generata/Effettiva:** {actual_answer}") st.divider() - # Mostra Dettagli API di Generazione (se presenti e richiesti) - generation_api_details = q_data.get('generation_api_details') - if generation_api_details and isinstance(generation_api_details, dict): - with st.container(): - st.markdown("###### Dettagli Chiamata API di Generazione Risposta") - if generation_api_details.get('request'): - st.caption("Richiesta API Generazione:") - st.json( - generation_api_details['request'], expanded=False - ) - if generation_api_details.get('response_content'): - st.caption("Contenuto Risposta API Generazione:") - # Prova a formattare se è una stringa JSON, altrimenti mostra com'è - try: - content = generation_api_details['response_content'] - if isinstance(content, str): - response_data_gen = json.loads(content) - else: - response_data_gen = content - st.code( - json.dumps(response_data_gen, indent=2), - language="json", - ) - except Exception: - st.text( - generation_api_details['response_content'] - ) - if generation_api_details.get('error'): - st.caption("Errore API Generazione:") - st.error(generation_api_details['error']) - st.divider() - - if evaluation_method == "LLM": - evaluation = q_data.get( - 'evaluation', {} - ) # Assicurati che evaluation sia sempre un dizionario - st.markdown("##### Valutazione LLM") - score = evaluation.get('score', 0) - explanation = evaluation.get( - 'explanation', "Nessuna spiegazione." - ) - similarity = evaluation.get('similarity', 0) - correctness = evaluation.get('correctness', 0) - completeness = evaluation.get('completeness', 0) - - st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") - st.markdown(f"**Spiegazione:** {explanation}") - - cols_eval_metrics = st.columns(3) - cols_eval_metrics[0].metric( - "Somiglianza", f"{similarity:.2f}%" - ) - cols_eval_metrics[1].metric( - "Correttezza", f"{correctness:.2f}%" - ) - cols_eval_metrics[2].metric( - "Completezza", f"{completeness:.2f}%" - ) - - api_details = evaluation.get('api_details') - if api_details and isinstance(api_details, dict): - with st.container(): # Sostituisce l'expander interno - st.markdown( - "###### Dettagli Chiamata API di Valutazione" - ) - if api_details.get('request'): - st.caption("Richiesta API:") - st.json(api_details['request'], expanded=False) - if api_details.get('response_content'): - st.caption("Contenuto Risposta API:") - content = api_details['response_content'] - parsed = json.loads(content) if isinstance( - content, str - ) else content - st.code( - json.dumps(parsed, indent=2), - language="json", - ) - if api_details.get('error'): - st.caption("Errore API:") - st.error(api_details['error']) + + evaluation = q_data.get( + 'evaluation', {} + ) # Assicurati che evaluation sia sempre un dizionario + st.markdown("##### Valutazione LLM") + score = evaluation.get('score', 0) + explanation = evaluation.get( + 'explanation', "Nessuna spiegazione." + ) + similarity = evaluation.get('similarity', 0) + correctness = evaluation.get('correctness', 0) + completeness = evaluation.get('completeness', 0) + + st.markdown(f"**Punteggio Complessivo:** {score:.2f}%") + st.markdown(f"**Spiegazione:** {explanation}") + + cols_eval_metrics = st.columns(3) + cols_eval_metrics[0].metric( + "Somiglianza", f"{similarity:.2f}%" + ) + cols_eval_metrics[1].metric( + "Correttezza", f"{correctness:.2f}%" + ) + cols_eval_metrics[2].metric( + "Completezza", f"{completeness:.2f}%" + ) + st.markdown("--- --- ---") From e04ee78ff66e7bb87d986bd4bd33b2d22c06036a Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 8 Aug 2025 13:35:29 +0200 Subject: [PATCH 14/41] some changes --- controllers/startup_controller.py | 8 ++-- models/api_preset.py | 8 ++-- models/db_utils.py | 75 ------------------------------- models/orm_models.py | 2 +- models/question.py | 10 ++--- models/question_set.py | 10 ++--- models/test_result.py | 8 ++-- views/gestione_domande.py | 9 ++-- 8 files changed, 27 insertions(+), 103 deletions(-) delete mode 100644 models/db_utils.py diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py index f6f0461..a40ba9f 100644 --- a/controllers/startup_controller.py +++ b/controllers/startup_controller.py @@ -1,7 +1,7 @@ import logging import os -from models.db_utils import init_db +from models.database import DatabaseEngine from controllers.question_controller import load_questions from controllers.question_set_controller import load_sets from controllers.test_controller import load_results @@ -11,7 +11,7 @@ def setup_logging(level: int = logging.INFO) -> None: - """Configura il logger root con un formato di base.""" + """Configura il logger radice con un formato di base.""" logging.basicConfig( level=level, format="%(asctime)s - %(levelname)s - %(message)s", @@ -19,8 +19,8 @@ def setup_logging(level: int = logging.INFO) -> None: def get_initial_state() -> dict: - """Inizializza il database e restituisce lo stato di default dell'applicazione.""" - init_db() + """Inizializza il database e restituisce lo stato predefinito dell'applicazione.""" + DatabaseEngine.instance().init_db() return { "questions": load_questions(), "question_sets": load_sets(), diff --git a/models/api_preset.py b/models/api_preset.py index 92fd24f..edca062 100644 --- a/models/api_preset.py +++ b/models/api_preset.py @@ -5,7 +5,7 @@ import pandas as pd from sqlalchemy import select -from models.db_utils import get_session +from models.database import DatabaseEngine from models.orm_models import APIPresetORM logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class APIPreset: @staticmethod def load_all() -> List["APIPreset"]: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: presets = session.execute(select(APIPresetORM)).scalars().all() return [ APIPreset( @@ -41,7 +41,7 @@ def load_all() -> List["APIPreset"]: @staticmethod def save_df(df: pd.DataFrame) -> None: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: existing_ids = session.execute(select(APIPresetORM.id)).scalars().all() incoming_ids = df['id'].astype(str).tolist() for del_id in set(existing_ids) - set(incoming_ids): @@ -65,7 +65,7 @@ def save_df(df: pd.DataFrame) -> None: @staticmethod def delete(preset_id: str) -> None: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: obj = session.get(APIPresetORM, preset_id) if obj: session.delete(obj) diff --git a/models/db_utils.py b/models/db_utils.py deleted file mode 100644 index 14c2524..0000000 --- a/models/db_utils.py +++ /dev/null @@ -1,75 +0,0 @@ -import logging - -import configparser -from pathlib import Path -from sqlalchemy import create_engine, text -from sqlalchemy.orm import declarative_base, sessionmaker -logger = logging.getLogger(__name__) - - -def _ensure_database(cfg): - """Crea il database di destinazione se non esiste.""" - root_url = ( - f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" - ) - engine = create_engine(root_url, isolation_level="AUTOCOMMIT") - try: - with engine.begin() as conn: - conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) - except Exception as exc: - logger.exception( - "Impossibile creare il database '%s' sull'host '%s' con l'utente '%s'", - cfg.get('database'), - cfg.get('host'), - cfg.get('user'), - ) - raise RuntimeError( - f"Impossibile creare il database '{cfg.get('database')}' sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " - "Il server del database potrebbe essere irraggiungibile, le credenziali potrebbero non essere valide " - "oppure l'utente potrebbe non avere privilegi sufficienti.", - ) from exc - - -Base = declarative_base() -_engine = None -_SessionFactory = None - - -def get_engine(): - """Restituisce un'istanza di motore SQLAlchemy.""" - global _engine - if _engine is None: - config = configparser.ConfigParser() - root = Path(__file__).resolve().parent.parent - cfg_path = root / 'db.config' - if not cfg_path.exists(): - cfg_path = root / 'db.config.example' - config.read(cfg_path) - cfg = config['mysql'] - _ensure_database(cfg) - url = ( - f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}/{cfg['database']}" - ) - _engine = create_engine( - url, - pool_pre_ping=True, # Verifica che le connessioni siano attive - pool_recycle=3600, # Ricicla le connessioni inattive per evitare timeout - ) - return _engine - - -def get_session(): - """Restituisce una nuova sessione ORM.""" - global _SessionFactory - engine = get_engine() - if _SessionFactory is None: - _SessionFactory = sessionmaker(bind=engine) - return _SessionFactory() - - -def init_db(): - """Crea le tabelle necessarie se non esistono.""" - engine = get_engine() - # Assicura che tutti i modelli ORM siano registrati - import models.orm_models # noqa: F401 - Base.metadata.create_all(engine) diff --git a/models/orm_models.py b/models/orm_models.py index 07b85df..7f4b6fe 100644 --- a/models/orm_models.py +++ b/models/orm_models.py @@ -7,7 +7,7 @@ from sqlalchemy import Column, String, Text, Float, Integer, ForeignKey, Table, JSON from sqlalchemy.orm import relationship -from .db_utils import Base +from .database import Base logger = logging.getLogger(__name__) # Tabella di associazione per la relazione molti-a-molti tra set e domande diff --git a/models/question.py b/models/question.py index 7c16e30..1bf1c6c 100644 --- a/models/question.py +++ b/models/question.py @@ -5,7 +5,7 @@ import uuid from sqlalchemy import select, delete -from models.db_utils import get_session +from models.database import DatabaseEngine from models.orm_models import QuestionORM, question_set_questions logger = logging.getLogger(__name__) @@ -19,7 +19,7 @@ class Question: @staticmethod def load_all() -> List["Question"]: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: results = session.execute(select(QuestionORM)).scalars().all() return [ Question( @@ -34,7 +34,7 @@ def load_all() -> List["Question"]: @staticmethod def add(domanda: str, risposta_attesa: str, categoria: str = "", question_id: Optional[str] = None) -> str: qid = question_id or str(uuid.uuid4()) - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: session.add( QuestionORM( id=qid, @@ -58,7 +58,7 @@ def update( Restituisce ``True`` se l'aggiornamento è andato a buon fine, ``False`` se la domanda non esiste. """ - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: q = session.get(QuestionORM, question_id) if not q: return False @@ -73,7 +73,7 @@ def update( @staticmethod def delete(question_id: str) -> None: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: session.execute( delete(question_set_questions).where(question_set_questions.c.question_id == question_id) ) diff --git a/models/question_set.py b/models/question_set.py index 156152f..77cdcd9 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -5,7 +5,7 @@ import uuid from sqlalchemy import select -from models.db_utils import get_session +from models.database import DatabaseEngine from models.orm_models import QuestionSetORM, QuestionORM logger = logging.getLogger(__name__) @@ -18,7 +18,7 @@ class QuestionSet: @staticmethod def load_all() -> List["QuestionSet"]: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: sets = session.execute(select(QuestionSetORM)).scalars().all() return [ QuestionSet( @@ -33,7 +33,7 @@ def load_all() -> List["QuestionSet"]: def create(name: str, question_ids: Optional[List[str]] = None) -> str: set_id = str(uuid.uuid4()) q_ids = [str(q) for q in (question_ids or [])] - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: qs = [] for qid in q_ids: q_obj = session.get(QuestionORM, qid) @@ -46,7 +46,7 @@ def create(name: str, question_ids: Optional[List[str]] = None) -> str: @staticmethod def update(set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None) -> None: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: qset = session.get(QuestionSetORM, set_id) if not qset: return @@ -63,7 +63,7 @@ def update(set_id: str, name: Optional[str] = None, question_ids: Optional[List[ @staticmethod def delete(set_id: str) -> None: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: qset = session.get(QuestionSetORM, set_id) if qset: session.delete(qset) diff --git a/models/test_result.py b/models/test_result.py index 97c2d4c..7b21961 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -7,7 +7,7 @@ import pandas as pd from sqlalchemy import select -from models.db_utils import get_session +from models.database import DatabaseEngine from models.orm_models import TestResultORM logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ class TestResult: @staticmethod def load_all() -> List["TestResult"]: - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: results = session.execute(select(TestResultORM)).scalars().all() return [ TestResult( @@ -40,7 +40,7 @@ def save_df(df: pd.DataFrame) -> None: df_to_save['results'] = df_to_save['results'].apply( lambda x: json.dumps(x) if isinstance(x, dict) else '{}' ) - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: existing_ids = session.execute(select(TestResultORM.id)).scalars().all() incoming_ids = df_to_save['id'].astype(str).tolist() for rid in set(existing_ids) - set(incoming_ids): @@ -68,7 +68,7 @@ def save_df(df: pd.DataFrame) -> None: @staticmethod def add(set_id: str, results_data: Dict) -> str: result_id = str(uuid.uuid4()) - with get_session() as session: + with DatabaseEngine.instance().get_session() as session: session.add( TestResultORM( id=result_id, diff --git a/views/gestione_domande.py b/views/gestione_domande.py index e25023a..a51a37f 100644 --- a/views/gestione_domande.py +++ b/views/gestione_domande.py @@ -9,7 +9,6 @@ delete_question, filter_questions_by_category, load_questions, - refresh_questions, import_questions_from_file, ) from views.style_utils import add_page_header @@ -32,7 +31,7 @@ def save_question_action( categoria=edited_category, ): state.save_success = True - st.session_state.questions = refresh_questions() + st.session_state.questions = load_questions() state.trigger_rerun = True else: state.save_error = True @@ -55,7 +54,7 @@ def delete_question_action(question_id) -> QuestionPageState: state = QuestionPageState() delete_question(question_id) state.delete_success = True - st.session_state.questions = refresh_questions() + st.session_state.questions = load_questions() state.trigger_rerun = True return state @@ -68,7 +67,7 @@ def import_questions_action(uploaded_file) -> QuestionPageState: if success: state.import_success = True state.import_success_message = message - st.session_state.questions = refresh_questions() + st.session_state.questions = load_questions() state.trigger_rerun = True else: state.import_error = True @@ -238,7 +237,7 @@ def render(): ) state.trigger_rerun = True st.session_state.question_page_state = state - st.session_state.questions = refresh_questions() + st.session_state.questions = load_questions() st.rerun() else: st.error("Sono necessarie sia la domanda che la risposta attesa.") From e7bb6662c74f4378229fc7fed3071ee872eb31ee Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 8 Aug 2025 13:37:47 +0200 Subject: [PATCH 15/41] some changes --- initialize_db.py | 2 +- models/database.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 models/database.py diff --git a/initialize_db.py b/initialize_db.py index cb58e53..7253a20 100644 --- a/initialize_db.py +++ b/initialize_db.py @@ -4,7 +4,7 @@ logger = logging.getLogger(__name__) try: - from models.db_utils import init_db + from models.database import init_db except ModuleNotFoundError as exc: logger.error( "Modulo mancante. Installa le dipendenze con 'pip install -r requirements.txt'" diff --git a/models/database.py b/models/database.py new file mode 100644 index 0000000..492dc55 --- /dev/null +++ b/models/database.py @@ -0,0 +1,92 @@ +import logging +import threading +import configparser +from pathlib import Path +from sqlalchemy import create_engine, text +from sqlalchemy.orm import declarative_base, sessionmaker + +logger = logging.getLogger(__name__) + + +class DatabaseEngine: + """Singleton thread-safe che fornisce l'engine del database e le sessioni.""" + + _instance = None + _instance_lock = threading.Lock() + + def __init__(self) -> None: + self._engine = None + self._session_factory = None + self._engine_lock = threading.Lock() + self._session_lock = threading.Lock() + + @classmethod + def instance(cls) -> "DatabaseEngine": + if cls._instance is None: + with cls._instance_lock: + if cls._instance is None: + cls._instance = cls() + return cls._instance + + def _load_config(self): + config = configparser.ConfigParser() + root = Path(__file__).resolve().parent.parent + cfg_path = root / "db.config" + if not cfg_path.exists(): + cfg_path = root / "db.config.example" + config.read(cfg_path) + return config["mysql"] + + def _ensure_database(self, cfg): + """Crea il database di destinazione se non esiste già.""" + root_url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" + ) + engine = create_engine(root_url, isolation_level="AUTOCOMMIT") + try: + with engine.begin() as conn: + conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{cfg['database']}`")) + except Exception as exc: + logger.exception( + "Impossibile creare il database '%s' sull'host '%s' con l'utente '%s'", + cfg.get("database"), + cfg.get("host"), + cfg.get("user"), + ) + raise RuntimeError( + f"Impossibile creare il database '{cfg.get('database')}' sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " + "Il server del database potrebbe non essere raggiungibile, le credenziali potrebbero essere errate o l'utente potrebbe non avere privilegi sufficienti." + ) from exc + + def get_engine(self): + if self._engine is None: + with self._engine_lock: + if self._engine is None: + cfg = self._load_config() + self._ensure_database(cfg) + url = ( + f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}/{cfg['database']}" + ) + self._engine = create_engine( + url, + pool_pre_ping=True, + pool_recycle=3600, + ) + return self._engine + + def get_session(self): + if self._session_factory is None: + with self._session_lock: + if self._session_factory is None: + engine = self.get_engine() + self._session_factory = sessionmaker(bind=engine) + return self._session_factory() + + def init_db(self): + engine = self.get_engine() + import models.orm_models # noqa: F401 + Base.metadata.create_all(engine) + + +Base = declarative_base() + From 7cb5ae3a7cc6a74de28284f13ddd88d6fe6481dd Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 8 Aug 2025 13:38:49 +0200 Subject: [PATCH 16/41] db fixed --- initialize_db.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/initialize_db.py b/initialize_db.py index 7253a20..2ab60d0 100644 --- a/initialize_db.py +++ b/initialize_db.py @@ -4,7 +4,7 @@ logger = logging.getLogger(__name__) try: - from models.database import init_db + from models.database import DatabaseEngine except ModuleNotFoundError as exc: logger.error( "Modulo mancante. Installa le dipendenze con 'pip install -r requirements.txt'" @@ -14,10 +14,10 @@ if __name__ == '__main__': setup_logging() - logger.info("Inizializzazione del database in corso...") + logger.info("Inizializzazione del database...") try: - init_db() + DatabaseEngine.instance().init_db() logger.info("Database inizializzato con successo!") except Exception as e: logger.error(f"Errore durante l'inizializzazione del database: {e}") - logger.exception("Traceback dettagliato:") + logger.exception("Traccia dettagliata:") From 885b334a9e3cc642ab181d28ffb11c3f3c6bda5d Mon Sep 17 00:00:00 2001 From: oniichan Date: Fri, 8 Aug 2025 14:08:23 +0200 Subject: [PATCH 17/41] clean code --- controllers/openai_client.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/controllers/openai_client.py b/controllers/openai_client.py index 92d5d96..b716648 100644 --- a/controllers/openai_client.py +++ b/controllers/openai_client.py @@ -10,15 +10,6 @@ DEFAULT_MODEL = "gpt-4o" DEFAULT_ENDPOINT = "https://api.openai.com/v1" -# Modelli disponibili per diversi provider (esempio) -OPENAI_MODELS = ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] -ANTHROPIC_MODELS = [ - "claude-3-opus-20240229", - "claude-3-sonnet-20240229", - "claude-3-haiku-20240307", -] -# Aggiungi altri provider e modelli se necessario -# XAI_MODELS = ["grok-1"] def get_openai_client(api_key: str, base_url: str | None = None): @@ -41,11 +32,6 @@ def get_available_models_for_endpoint( provider_name: str, endpoint_url: str | None = None, api_key: str | None = None ): """Restituisce una lista di modelli disponibili basata sul provider o sull'endpoint.""" - - if provider_name == "OpenAI": - return OPENAI_MODELS - if provider_name == "Anthropic": - return ANTHROPIC_MODELS # Aggiungi altri provider predefiniti qui # elif provider_name == "XAI": # return XAI_MODELS From 83a3e5a31ae19ed733455573075b91d2078537e4 Mon Sep 17 00:00:00 2001 From: oniichan Date: Mon, 11 Aug 2025 12:21:55 +0200 Subject: [PATCH 18/41] improved the stucture , added some test file --- app.py | 2 +- controllers/__init__.py | 76 ++-- controllers/api_preset_controller.py | 18 +- controllers/question_controller.py | 206 +++++----- controllers/question_set_controller.py | 292 +++---------- controllers/result_controller.py | 100 +++++ controllers/startup_controller.py | 39 +- controllers/test_controller.py | 519 +++++++++--------------- initialize_db.py | 2 +- models/api_preset.py | 31 +- models/question.py | 93 ++++- models/question_set.py | 187 ++++++++- models/test_result.py | 155 +++++-- tests/sample_data/question_sets.csv | 5 + tests/sample_data/question_sets.json | 8 + tests/sample_data/questions.csv | 4 + tests/sample_data/questions.json | 5 + tests/sample_data/test_results.csv | 3 + tests/sample_data/test_results.json | 4 + tests/test_api_preset_controller.py | 14 +- tests/test_data_format_utils.py | 39 ++ tests/test_evaluate_answer.py | 42 +- tests/test_import_results.py | 38 ++ tests/test_openai_controllers.py | 45 +- tests/test_question_controller.py | 189 ++++++++- tests/test_question_import.py | 65 +++ tests/test_question_set_controller.py | 31 ++ tests/test_question_set_import.py | 48 +++ tests/test_question_set_importer.py | 97 ++--- tests/test_set_helpers.py | 76 ++++ utils/cache.py | 8 +- utils/data_format_utils.py | 58 +++ utils/file_reader_utils.py | 218 ++++++++++ {controllers => utils}/openai_client.py | 0 utils/startup_utils.py | 31 ++ views/__init__.py | 2 +- views/api_configurazione.py | 2 +- views/esecuzione_test.py | 23 +- views/gestione_domande.py | 117 +++--- views/gestione_set.py | 134 ++---- views/session_state.py | 2 +- views/set_helpers.py | 119 +++--- views/state_models.py | 4 +- views/visualizza_risultati.py | 123 ++---- 44 files changed, 2063 insertions(+), 1211 deletions(-) create mode 100644 controllers/result_controller.py create mode 100644 tests/sample_data/question_sets.csv create mode 100644 tests/sample_data/question_sets.json create mode 100644 tests/sample_data/questions.csv create mode 100644 tests/sample_data/questions.json create mode 100644 tests/sample_data/test_results.csv create mode 100644 tests/sample_data/test_results.json create mode 100644 tests/test_data_format_utils.py create mode 100644 tests/test_import_results.py create mode 100644 tests/test_question_import.py create mode 100644 tests/test_question_set_import.py create mode 100644 tests/test_set_helpers.py create mode 100644 utils/data_format_utils.py create mode 100644 utils/file_reader_utils.py rename {controllers => utils}/openai_client.py (100%) create mode 100644 utils/startup_utils.py diff --git a/app.py b/app.py index 9f6c543..a9cfade 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,7 @@ ) from views.session_state import initialize_session_state from views.style_utils import add_global_styles -from controllers.startup_controller import setup_logging +from utils.startup_utils import setup_logging logger = logging.getLogger(__name__) diff --git a/controllers/__init__.py b/controllers/__init__.py index 930c7fe..527141e 100644 --- a/controllers/__init__.py +++ b/controllers/__init__.py @@ -1,6 +1,6 @@ -"""Expose controller utilities for external use.""" +"""Esporta le utilità dei controller per uso esterno.""" -# API preset management +# Gestione dei preset API import logging from .api_preset_controller import ( @@ -8,84 +8,106 @@ refresh_api_presets, list_presets, get_preset_by_id, + get_preset_by_name, validate_preset, save_preset, delete_preset, test_api_connection, ) -# Question CRUD +# Operazioni CRUD sulle domande from .question_controller import ( load_questions, refresh_questions, add_question, update_question, delete_question, - filter_questions_by_category, - import_questions_from_file, + get_filtered_questions, + save_question_action, + delete_question_action, + import_questions_action, + get_question_text, + get_question_category, ) -# Question set management +# Gestione dei set di domande from .question_set_controller import ( load_sets, refresh_question_sets, create_set, update_set, delete_set, - import_sets_from_file, + prepare_sets_for_view, ) -# Results and evaluation utilities +# Risultati e utilità di valutazione from .test_controller import ( load_results, refresh_results, - add_result, - save_results, - import_results_from_file, - calculate_statistics, + import_results_action, + generate_answer, evaluate_answer, - execute_llm_test, + run_test, ) -# Import helpers +from .result_controller import ( + get_results, + list_set_names, + list_model_names, + prepare_select_options, +) + +from models.test_result import TestResult + +calculate_statistics = TestResult.calculate_statistics + +# Funzioni di avvio from .startup_controller import get_initial_state logger = logging.getLogger(__name__) __all__ = [ - # API preset + # Preset API "load_presets", "refresh_api_presets", "list_presets", "get_preset_by_id", + "get_preset_by_name", "validate_preset", "save_preset", "delete_preset", "test_api_connection", - # Questions + # Domande "load_questions", "refresh_questions", "add_question", "update_question", "delete_question", - "filter_questions_by_category", - "import_questions_from_file", - # Question sets + "get_filtered_questions", + "save_question_action", + "delete_question_action", + "import_questions_action", + "get_question_text", + "get_question_category", + # Set di domande "load_sets", "refresh_question_sets", "create_set", "update_set", "delete_set", - "import_sets_from_file", - # Test results + "prepare_sets_for_view", + # Risultati dei test "load_results", "refresh_results", - "add_result", - "save_results", - "import_results_from_file", - "calculate_statistics", + "import_results_action", + "generate_answer", "evaluate_answer", - "execute_llm_test", - # Startup + "calculate_statistics", + "run_test", + "get_results", + "list_set_names", + "list_model_names", + "prepare_select_options", + # Avvio "get_initial_state", ] diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py index 3475a7f..5aa2f64 100644 --- a/controllers/api_preset_controller.py +++ b/controllers/api_preset_controller.py @@ -14,7 +14,7 @@ ) from openai import APIConnectionError, APIStatusError, RateLimitError -from . import openai_client +from utils import openai_client logger = logging.getLogger(__name__) @@ -47,6 +47,18 @@ def get_preset_by_id( return match.iloc[0].to_dict() +def get_preset_by_name( + name: str, df: pd.DataFrame | None = None +) -> Optional[dict]: + """Recupera un singolo preset dato il suo nome.""" + if df is None: + df = load_presets() + match = df[df["name"] == name] + if match.empty: + return None + return match.iloc[0].to_dict() + + def validate_preset(data: dict, preset_id: Optional[str] = None) -> Tuple[bool, str]: """Valida i dati di un preset prima del salvataggio.""" name = data.get("name", "").strip() @@ -72,6 +84,7 @@ def save_preset( df = load_presets() preset_data = { "name": data.get("name"), + "provider_name": data.get("provider_name", ""), "endpoint": data.get("endpoint"), "api_key": data.get("api_key"), "model": data.get("model"), @@ -90,7 +103,8 @@ def save_preset( df = pd.concat([df, pd.DataFrame([preset_data])], ignore_index=True) success_message = f"Preset '{preset_data['name']}' creato con successo!" - APIPreset.save_df(df) + presets = [APIPreset(**row) for row in df.to_dict(orient="records")] + APIPreset.save(presets) updated_df = refresh_api_presets() return True, success_message, updated_df diff --git a/controllers/question_controller.py b/controllers/question_controller.py index 4465da9..6812c64 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,11 +1,7 @@ """Controller per la gestione delle domande senza layer di service.""" import logging -from typing import Optional, Tuple, List - -import json -import os -import uuid +from typing import Optional, Tuple, List, Dict, Any import pandas as pd @@ -75,114 +71,94 @@ def delete_question(question_id: str) -> None: refresh_questions() -def filter_questions_by_category( - category: Optional[str] = None, -) -> Tuple[pd.DataFrame, List[str]]: - """Restituisce le domande filtrate per categoria e tutte le categorie.""" - - df = load_questions() - - if df.empty: - return df, [] - +def get_filtered_questions(category: Optional[str] = None) -> Tuple[pd.DataFrame, List[str]]: + """Restituisce il ``DataFrame`` filtrato e l'elenco delle categorie.""" + return Question.filter_by_category(category) + + +def save_question_action( + question_id: str, edited_question: str, edited_answer: str, edited_category: str +) -> dict: + """Aggiorna una domanda e restituisce un dizionario con l'esito. + + Restituisce + ----------- + dict + ``{"success": bool, "questions_df": DataFrame | None}`` + + In caso di successo viene anche ricaricata la lista delle domande. + Eventuali errori sollevati da ``update_question`` vengono propagati. + """ + success = update_question( + question_id, + domanda=edited_question, + risposta_attesa=edited_answer, + categoria=edited_category, + ) + questions = refresh_questions() if success else None + return {"success": success, "questions_df": questions} + + +def delete_question_action(question_id: str) -> pd.DataFrame: + """Elimina una domanda e restituisce il ``DataFrame`` aggiornato.""" + delete_question(question_id) + questions = refresh_questions() + return questions + + +def import_questions_action(uploaded_file) -> Dict[str, Any]: + """Importa domande da file e restituisce i risultati dell'operazione. + + Parametri + --------- + uploaded_file: file-like + Il file caricato dall'utente. + + Restituisce + ----------- + dict + ``{"questions_df": DataFrame, "imported_count": int, "warnings": list[str]}`` + """ + + if uploaded_file is None: + raise ValueError("Nessun file caricato.") + + result = Question.import_from_file(uploaded_file) + if not result["success"]: + message = "; ".join(result.get("warnings", [])) + raise ValueError(message) + + questions = refresh_questions() + return { + "questions_df": questions, + "imported_count": result["imported_count"], + "warnings": result.get("warnings", []), + } + + +def get_question_text(question_id: str, questions_df: Optional[pd.DataFrame] = None) -> str: + """Ritorna il testo della domanda dato il suo ID, ricaricando la cache se necessario.""" + df = questions_df if questions_df is not None else load_questions() + if "domanda" not in df.columns: + df = refresh_questions() + if "domanda" not in df.columns: + return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" + row = df[df["id"] == str(question_id)] + if not row.empty: + return row.iloc[0]["domanda"] + return f"ID Domanda: {question_id} (non trovata)" + + +def get_question_category( + question_id: str, questions_df: Optional[pd.DataFrame] = None +) -> str: + """Ritorna la categoria della domanda dato il suo ID, ricaricando la cache se necessario.""" + df = questions_df if questions_df is not None else load_questions() if "categoria" not in df.columns: - df["categoria"] = "" - else: - df["categoria"] = df["categoria"].fillna("N/A") - - categories = sorted(list(df["categoria"].astype(str).unique())) - - if category: - filtered_df = df[df["categoria"] == category] - else: - filtered_df = df - - return filtered_df, categories - - -def import_questions_from_file(file) -> Tuple[bool, str]: - """Importa domande da un file CSV o JSON.""" - - def _import(file) -> Tuple[bool, str]: - try: - file_extension = os.path.splitext(file.name)[1].lower() - imported_df = None - - if file_extension == ".csv": - try: - imported_df = pd.read_csv(file) - except Exception: - return False, "Il formato del file csv non è valido" - elif file_extension == ".json": - try: - data = json.load(file) - except Exception: - return False, "Il formato del file json non è valido" - if isinstance(data, list): - imported_df = pd.DataFrame(data) - elif ( - isinstance(data, dict) - and "questions" in data - and isinstance(data["questions"], list) - ): - imported_df = pd.DataFrame(data["questions"]) - else: - return False, ( - "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." - ) - else: - return False, "Formato file non supportato. Caricare un file CSV o JSON." - - if imported_df is None or imported_df.empty: - return False, "Il file importato è vuoto o non contiene dati validi." - - if "question" in imported_df.columns and "domanda" not in imported_df.columns: - imported_df.rename(columns={"question": "domanda"}, inplace=True) - if ( - "expected_answer" in imported_df.columns - and "risposta_attesa" not in imported_df.columns - ): - imported_df.rename( - columns={"expected_answer": "risposta_attesa"}, inplace=True - ) - - required_columns = ["domanda", "risposta_attesa"] - if not all(col in imported_df.columns for col in required_columns): - return ( - False, - f"Il file importato deve contenere le colonne '{required_columns[0]}' " - f"e '{required_columns[1]}'.", - ) - - if "id" not in imported_df.columns: - imported_df["id"] = [str(uuid.uuid4()) for _ in range(len(imported_df))] - else: - imported_df["id"] = imported_df["id"].astype(str) - - if "categoria" not in imported_df.columns: - imported_df["categoria"] = "" - else: - imported_df["categoria"] = imported_df["categoria"].astype(str).fillna("") - - imported_df["domanda"] = imported_df["domanda"].astype(str).fillna("") - imported_df["risposta_attesa"] = ( - imported_df["risposta_attesa"].astype(str).fillna("") - ) - - final_imported_df = imported_df[["id", "domanda", "risposta_attesa", "categoria"]] - - added_count = 0 - for _, row in final_imported_df.iterrows(): - Question.add( - row["domanda"], - row["risposta_attesa"], - row["categoria"], - question_id=row["id"], - ) - added_count += 1 - refresh_questions() - return True, f"Importate con successo {added_count} domande." - except Exception as e: # pragma: no cover - errors should be rare and simple - return False, f"Errore durante l'importazione delle domande: {str(e)}" - - return _import(file) + df = refresh_questions() + if "categoria" not in df.columns: + return f"ID Domanda: {question_id} (colonna 'categoria' mancante)" + row = df[df["id"] == str(question_id)] + if not row.empty: + return row.iloc[0]["categoria"] + return f"ID Domanda: {question_id} (non trovata)" diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 0eaa453..2a96173 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -1,22 +1,20 @@ import logging -import os -import json -from typing import List, Optional, Any, Dict, Tuple +from typing import List, Optional, Any, Dict import pandas as pd -from .question_controller import add_question_if_not_exists, load_questions -from models.question_set import QuestionSet +from models.question_set import QuestionSet, PersistSetsResult from utils.cache import ( + get_questions as _get_questions, get_question_sets as _get_question_sets, refresh_question_sets as _refresh_question_sets, ) - +from utils.data_format_utils import ( + build_questions_detail, + format_questions_for_view, +) logger = logging.getLogger(__name__) -REQUIRED_CSV_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] - - def load_sets() -> pd.DataFrame: """Restituisce tutti i set di domande utilizzando la cache.""" return _get_question_sets() @@ -50,241 +48,55 @@ def delete_set(set_id: str) -> None: refresh_question_sets() -def parse_input(uploaded_file) -> List[Dict[str, Any]]: - """Analizza un file CSV o JSON in una lista di dizionari di set di domande.""" - file_extension = os.path.splitext(uploaded_file.name)[1].lower() - - if file_extension == ".csv": - try: - df = pd.read_csv(uploaded_file) - except Exception as e: # pragma: no cover - handled as generic csv error - raise ValueError("Il formato del file csv non è valido") from e - - missing = [c for c in REQUIRED_CSV_COLUMNS if c not in df.columns] - if missing: - raise ValueError( - "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_CSV_COLUMNS) - ) - - sets_dict: Dict[str, List[Dict[str, str]]] = {} - for _, row in df.iterrows(): - name = str(row["name"]).strip() - if not name: - continue - question = { - "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", - "domanda": str(row["domanda"]).strip() - if not pd.isna(row["domanda"]) - else "", - "risposta_attesa": str(row["risposta_attesa"]).strip() - if not pd.isna(row["risposta_attesa"]) - else "", - "categoria": str(row["categoria"]).strip() - if not pd.isna(row["categoria"]) - else "", - } - sets_dict.setdefault(name, []).append(question) - return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] - - try: - string_data = uploaded_file.getvalue().decode("utf-8") - data = json.loads(string_data) - except Exception as e: # pragma: no cover - handled as generic json error - raise ValueError("Il formato del file json non è valido") from e - - if not isinstance(data, list): - raise ValueError("Il formato del file json non è valido") - return data - - -def resolve_question_ids( - questions_in_set_data: List[Any], - current_questions: pd.DataFrame, -) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: - """Risolve gli identificatori delle domande per un set di domande.""" - warnings: List[str] = [] - question_ids: List[str] = [] - new_added = 0 - existing_found = 0 - - for q_idx, q_data in enumerate(questions_in_set_data): - if isinstance(q_data, dict): - q_id = str(q_data.get("id", "")) - q_text = q_data.get("domanda", "") - q_answer = q_data.get("risposta_attesa", "") - q_category = q_data.get("categoria", "") - else: - q_id = str(q_data) - q_text = "" - q_answer = "" - q_category = "" - - if not q_id: - warnings.append(f"Domanda #{q_idx + 1} senza ID (saltata).") - continue - - if q_text and q_answer: - if q_id in current_questions["id"].astype(str).values: - existing_found += 1 - question_ids.append(q_id) - else: - was_added = add_question_if_not_exists( - question_id=q_id, - domanda=q_text, - risposta_attesa=q_answer, - categoria=q_category, - ) - if was_added: - new_added += 1 - question_ids.append(q_id) - new_row = pd.DataFrame( - { - "id": [q_id], - "domanda": [q_text], - "risposta_attesa": [q_answer], - "categoria": [q_category], - } - ) - current_questions = pd.concat( - [current_questions, new_row], ignore_index=True - ) - else: - existing_found += 1 - question_ids.append(q_id) - continue - - if q_id in current_questions["id"].astype(str).values: - existing_found += 1 - question_ids.append(q_id) - else: - warnings.append( - f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." - ) - - return question_ids, current_questions, new_added, existing_found, warnings - - -def persist_sets( - sets_data: List[Dict[str, Any]], - current_questions: pd.DataFrame, - current_sets: pd.DataFrame, +def prepare_sets_for_view( + selected_categories: Optional[List[str]] = None, ) -> Dict[str, Any]: - """Crea set di domande dai dati analizzati.""" - sets_imported_count = 0 - new_questions_added_count = 0 - existing_questions_found_count = 0 - warnings: List[str] = [] - - for set_idx, set_data in enumerate(sets_data): - if not isinstance(set_data, dict): - warnings.append( - f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." - ) - continue - - set_name = set_data.get("name") - questions_in_set_data = set_data.get("questions", []) - - if not set_name or not isinstance(set_name, str) or not set_name.strip(): - warnings.append( - f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." - ) - continue - - if not isinstance(questions_in_set_data, list): - warnings.append( - f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." - ) - continue - - if set_name in current_sets.get("name", pd.Series([])).values: - warnings.append( - f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." - ) - continue + """Prepara le informazioni dei set e delle domande per la vista.""" + try: + questions_df_raw = _get_questions() + sets_df = _get_question_sets() - question_ids, current_questions, added, existing, q_warnings = resolve_question_ids( - questions_in_set_data, current_questions + questions_df, question_map, categories = format_questions_for_view( + questions_df_raw ) - warnings.extend(q_warnings) - if question_ids or len(questions_in_set_data) == 0: - try: - QuestionSet.create(set_name, question_ids) - sets_imported_count += 1 - except Exception as e: # pragma: no cover - protective - warnings.append( - f"Errore durante la creazione del set '{set_name}': {e}" - ) - else: - warnings.append( - f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + if sets_df.empty: + sets_df = pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] ) - - new_questions_added_count += added - existing_questions_found_count += existing - - sets_df = refresh_question_sets() - - success = sets_imported_count > 0 - success_message = "" - if success: - parts = [] - if sets_imported_count > 0: - parts.append(f"{sets_imported_count} set importati") - if new_questions_added_count > 0: - parts.append(f"{new_questions_added_count} nuove domande aggiunte") - if existing_questions_found_count > 0: - parts.append( - f"{existing_questions_found_count} domande esistenti referenziate" + else: + sets_df = sets_df.copy() + sets_df["questions_detail"] = sets_df["questions"].apply( + lambda q_ids: build_questions_detail(question_map, q_ids) ) - success_message = ". ".join(parts) + "." - - return { - "sets_imported_count": sets_imported_count, - "new_questions_added_count": new_questions_added_count, - "existing_questions_found_count": existing_questions_found_count, - "questions_df": current_questions, - "sets_df": sets_df, - "warnings": warnings, - "success": success, - "success_message": success_message, - } - - -def import_sets_from_file(uploaded_file) -> Dict[str, Any]: - """Importa uno o più set di domande da un file JSON o CSV.""" - result: Dict[str, Any] = { - "success": False, - "success_message": "", - "error_message": "", - "questions_df": None, - "sets_df": None, - "warnings": [], - } - - if uploaded_file is None: - result["error_message"] = "Nessun file fornito per l'importazione." - return result - - try: - data = parse_input(uploaded_file) - current_questions = load_questions() - current_sets = load_sets() - persist_result = persist_sets(data, current_questions, current_sets) - - result.update( - { - "success": persist_result["success"], - "success_message": persist_result["success_message"], - "questions_df": persist_result["questions_df"], - "sets_df": persist_result["sets_df"], - "warnings": persist_result["warnings"], - } - ) - except ValueError as e: - result["error_message"] = str(e) - except Exception as e: # pragma: no cover - general protection - result["error_message"] = f"Errore imprevisto durante l'importazione: {str(e)}" - return result + filtered_sets_df = sets_df + if selected_categories: + def has_categories(details: List[Dict[str, Any]]) -> bool: + categories_in_set = {d.get("categoria") for d in details} + return all(cat in categories_in_set for cat in selected_categories) + + filtered_sets_df = sets_df[ + sets_df["questions_detail"].apply(has_categories) + ] + + return { + "questions_df": questions_df, + "sets_df": filtered_sets_df, + "raw_sets_df": sets_df, + "categories": categories, + } + except Exception as exc: # pragma: no cover - error path + logger.error("Errore nella preparazione dei set: %s", exc) + return { + "questions_df": pd.DataFrame( + columns=["id", "domanda", "risposta_attesa", "categoria"] + ), + "sets_df": pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] + ), + "raw_sets_df": pd.DataFrame( + columns=["id", "name", "questions", "questions_detail"] + ), + "categories": [], + } diff --git a/controllers/result_controller.py b/controllers/result_controller.py new file mode 100644 index 0000000..b650998 --- /dev/null +++ b/controllers/result_controller.py @@ -0,0 +1,100 @@ +import logging +from typing import Dict + +import pandas as pd + +from .test_controller import load_results +from .question_set_controller import load_sets +from .api_preset_controller import load_presets + +logger = logging.getLogger(__name__) + + +def get_results(filter_set: str | None, filter_model: str | None) -> pd.DataFrame: + """Carica i risultati e applica eventuali filtri per set e modello LLM.""" + df = load_results() + + if filter_set: + sets_df = load_sets() + set_ids = ( + sets_df[sets_df["name"] == filter_set]["id"].astype(str).tolist() + if not sets_df.empty + else [] + ) + df = df[df["set_id"].astype(str).isin(set_ids)] + + if filter_model: + presets_df = load_presets() + preset_models: Dict[str, str] = ( + presets_df.set_index("name")["model"].to_dict() + if not presets_df.empty + else {} + ) + + def matches_model(res: dict) -> bool: + model = res.get("generation_llm") + if not model: + preset_name = res.get("generation_preset") + model = preset_models.get(preset_name) if preset_name else None + return model == filter_model + + df = df[df["results"].apply(matches_model)] + + return df + + +def list_set_names(results_df: pd.DataFrame, question_sets_df: pd.DataFrame) -> list[str]: + """Elenca i nomi dei set disponibili nei risultati.""" + if results_df.empty or question_sets_df.empty: + return [] + set_name_map = { + str(row["id"]): row["name"] + for row in question_sets_df.to_dict("records") + } + names = {set_name_map.get(str(sid), "Set Sconosciuto") for sid in results_df["set_id"]} + return sorted(names) + + +def list_model_names(results_df: pd.DataFrame) -> list[str]: + """Elenca i nomi dei modelli LLM presenti nei risultati.""" + if results_df.empty: + return [] + presets_df = load_presets() + preset_models: Dict[str, str] = ( + presets_df.set_index("name")["model"].to_dict() if not presets_df.empty else {} + ) + models = set() + for res in results_df["results"]: + model = res.get("generation_llm") + if not model and res.get("generation_preset"): + model = preset_models.get(res.get("generation_preset")) + if model: + models.add(model) + return sorted(models) + + +def prepare_select_options( + results_df: pd.DataFrame, question_sets_df: pd.DataFrame +) -> Dict[str, str]: + """Prepara le opzioni del selectbox dei risultati.""" + if results_df.empty: + return {} + set_name_map = { + str(row["id"]): row["name"] + for row in question_sets_df.to_dict("records") + } + processed = [] + for _, row in results_df.iterrows(): + result_data = row["results"] + set_name = set_name_map.get(str(row["set_id"]), "Set Sconosciuto") + avg_score = result_data.get("avg_score", 0) + method = result_data.get("method", "N/A") + method_icon = "🤖" if method == "LLM" else "📊" + processed.append( + { + "id": row["id"], + "display_name": f"{row['timestamp']} - {method_icon} {set_name} (Avg: {avg_score:.2f}%) - {method}", + } + ) + processed.sort(key=lambda x: x["display_name"].split(" - ")[0], reverse=True) + return {p["id"]: p["display_name"] for p in processed} diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py index a40ba9f..5d98551 100644 --- a/controllers/startup_controller.py +++ b/controllers/startup_controller.py @@ -1,33 +1,22 @@ import logging -import os -from models.database import DatabaseEngine -from controllers.question_controller import load_questions -from controllers.question_set_controller import load_sets -from controllers.test_controller import load_results -from controllers.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT +from utils.cache import get_questions, get_question_sets, get_results +from utils.startup_utils import ( + setup_logging, + initialize_database, + load_default_config, +) logger = logging.getLogger(__name__) -def setup_logging(level: int = logging.INFO) -> None: - """Configura il logger radice con un formato di base.""" - logging.basicConfig( - level=level, - format="%(asctime)s - %(levelname)s - %(message)s", - ) - - def get_initial_state() -> dict: - """Inizializza il database e restituisce lo stato predefinito dell'applicazione.""" - DatabaseEngine.instance().init_db() - return { - "questions": load_questions(), - "question_sets": load_sets(), - "results": load_results(), - "api_key": os.environ.get("OPENAI_API_KEY", ""), - "endpoint": DEFAULT_ENDPOINT, - "model": DEFAULT_MODEL, - "temperature": 0.0, - "max_tokens": 1000, + """Restituisce lo stato predefinito dell'applicazione.""" + initialize_database() + defaults = load_default_config() + cached_data = { + "questions": get_questions(), + "question_sets": get_question_sets(), + "results": get_results(), } + return {**cached_data, **defaults} diff --git a/controllers/test_controller.py b/controllers/test_controller.py index 6b7c0db..b2a098c 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -2,415 +2,286 @@ from __future__ import annotations -import logging import json -import uuid +import logging from datetime import datetime -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Any import pandas as pd from openai import APIConnectionError, APIStatusError, RateLimitError -from models.question import Question from models.test_result import TestResult -from . import openai_client -from utils.cache import ( - get_results as _get_results, - refresh_results as _refresh_results, -) +from models.question import Question +from utils import openai_client + +DEFAULT_MODEL = openai_client.DEFAULT_MODEL logger = logging.getLogger(__name__) def load_results() -> pd.DataFrame: """Restituisce i risultati dei test utilizzando la cache.""" - - return _get_results() + return TestResult.load_all_df() def refresh_results() -> pd.DataFrame: """Svuota e ricarica la cache dei risultati dei test.""" + return TestResult.refresh_cache() - return _refresh_results() - - -def add_result(set_id: str, results_data: Dict) -> str: - """Aggiunge un nuovo risultato di test e aggiorna la cache.""" - - rid = TestResult.add(set_id, results_data) - refresh_results() - return rid - - -def save_results(df: pd.DataFrame) -> None: - """Salva il DataFrame dei risultati e aggiorna la cache.""" - - TestResult.save_df(df) - refresh_results() - - -def import_results_from_file(file) -> Tuple[bool, str]: - """Importa risultati di test da un file JSON.""" - try: - data = json.load(file) - if isinstance(data, dict): - data = [data] - if not isinstance(data, list): - return False, "Il file JSON deve contenere un oggetto o una lista di risultati." - - results_df = load_results() - added_count = 0 - - for item in data: - if not isinstance(item, dict): - continue - - result_id = str(item.get("id", uuid.uuid4())) - if result_id in results_df["id"].astype(str).values: - continue - - set_id = str(item.get("set_id", "")) - timestamp = str( - item.get( - "timestamp", - datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - ) - ) - results_content = item.get("results", {}) - - new_row = { - "id": result_id, - "set_id": set_id, - "timestamp": timestamp, - "results": results_content if isinstance(results_content, dict) else {}, - } +def import_results_action(uploaded_file) -> Tuple[pd.DataFrame, str]: + """Importa risultati da ``uploaded_file`` e restituisce il DataFrame aggiornato. - results_df = pd.concat( - [results_df, pd.DataFrame([new_row])], ignore_index=True - ) - added_count += 1 + Parametri + --------- + uploaded_file: Oggetto tipo file caricato contenente i risultati. - if added_count > 0: - save_results(results_df) - message = f"Importati {added_count} risultati." - else: - message = "Nessun nuovo risultato importato." + Restituisce + ----------- + Tuple[pd.DataFrame, str] + Il DataFrame aggiornato dei risultati e un messaggio descrittivo. - return True, message - except Exception as e: # noqa: BLE001 - return False, f"Errore durante l'importazione dei risultati: {str(e)}" + Eccezioni + --------- + ValueError + Se il file non è presente o contiene dati non validi. + """ + if uploaded_file is None: + raise ValueError("Nessun file caricato.") -def calculate_statistics(questions_results: Dict[str, Dict]) -> Dict: - """Calcola statistiche dai risultati grezzi delle domande.""" + success, message = TestResult.import_from_file(uploaded_file) + if not success: + raise ValueError(message) - if not questions_results: - return { - "avg_score": 0, - "per_question_scores": [], - "radar_metrics": { - "similarity": 0, - "correctness": 0, - "completeness": 0, - }, - } + results = load_results() + return results, message - per_question_scores: List[Dict] = [] - radar_sums = {"similarity": 0, "correctness": 0, "completeness": 0} - for qid, qdata in questions_results.items(): - evaluation = qdata.get("evaluation", {}) - score = evaluation.get("score", 0) - per_question_scores.append( - {"question": qdata.get("question", f"Domanda {qid}"), "score": score} - ) - for metric in radar_sums.keys(): - radar_sums[metric] += evaluation.get(metric, 0) +def generate_answer(question: str, client_config: Dict[str, Any]) -> str: + """Genera una risposta per ``question`` utilizzando la configurazione LLM fornita. - count = len(per_question_scores) - avg_score = ( - sum(item["score"] for item in per_question_scores) / count if count > 0 else 0 - ) - radar_metrics = { - metric: radar_sums[metric] / count if count > 0 else 0 - for metric in radar_sums - } - - return { - "avg_score": avg_score, - "per_question_scores": per_question_scores, - "radar_metrics": radar_metrics, - } - - -def evaluate_answer( - question: str, - expected_answer: str, - actual_answer: str, - client_config: dict, -): - """Valuta una risposta utilizzando un LLM specificato tramite client_config.""" + Restituisce solo la risposta generata. In caso di errore viene sollevata + un'eccezione. + """ client = openai_client.get_openai_client( api_key=client_config.get("api_key"), base_url=client_config.get("endpoint"), ) if not client: - return { - "score": 0, - "explanation": "Errore: Client API per la valutazione non configurato.", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } + logger.error("Client API per la generazione risposte non configurato.") + raise ValueError("Client API non configurato") - prompt = f""" - Sei un valutatore esperto che valuta la qualità delle risposte alle domande. - Domanda: {question} - Risposta Attesa: {expected_answer} - Risposta Effettiva: {actual_answer} - - Valuta la risposta effettiva rispetto alla risposta attesa in base a: - 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? - 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? - 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? - Calcola un punteggio complessivo (0-100) basato su queste metriche. - Fornisci una breve spiegazione della tua valutazione (max 100 parole). - Formatta la tua risposta come un oggetto JSON con questi campi: - - score: il punteggio complessivo (numero) - - explanation: la tua spiegazione (stringa) - - similarity: punteggio di somiglianza (numero) - - correctness: punteggio di correttezza (numero) - - completeness: punteggio di completezza (numero) - Esempio di risposta JSON: - {{ - "score": 95, - "explanation": "La risposta è corretta e completa", - "similarity": 90, - "correctness": 100, - "completeness": 95 - }} - """ + if question is None or not isinstance(question, str) or question.strip() == "": + logger.error("La domanda fornita è vuota o non valida.") + raise ValueError("Domanda vuota o non valida") + prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" api_request_details = { - "model": client_config.get("model", openai_client.DEFAULT_MODEL), + "model": client_config.get("model", DEFAULT_MODEL), "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.0), - "max_tokens": client_config.get("max_tokens", 250), - "response_format": {"type": "json_object"}, + "temperature": client_config.get("temperature", 0.7), + "max_tokens": client_config.get("max_tokens", 500), } try: response = client.chat.completions.create(**api_request_details) choices = getattr(response, "choices", None) - if not choices: - logger.error("Risposta API priva di 'choices' validi") - return { - "score": 0, - "explanation": "Errore: risposta API non valida.", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } - content = choices[0].message.content or "{}" - try: - evaluation = json.loads(content) - required_keys = [ - "score", - "explanation", - "similarity", - "correctness", - "completeness", - ] - if not all(key in evaluation for key in required_keys): - logger.warning( - f"Risposta JSON dalla valutazione LLM incompleta: {content}. Verranno usati valori di default." - ) - for key in required_keys: - if key not in evaluation: - evaluation[key] = ( - 0 - if key != "explanation" - else "Valutazione incompleta o formato JSON non corretto." - ) - return evaluation - except json.JSONDecodeError: - logger.error( - f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" - ) - return { - "score": 0, - "explanation": f"Errore di decodifica JSON: {content[:100]}...", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } - + if not choices or not choices[0].message.content: + raise RuntimeError("Risposta API non valida") + return choices[0].message.content.strip() except (APIConnectionError, RateLimitError, APIStatusError) as e: - logger.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") - return { - "score": 0, - "explanation": f"Errore API: {type(e).__name__}", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } + logger.error( + f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + ) + raise RuntimeError(str(e)) from e except Exception as exc: # noqa: BLE001 logger.error( - f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" + f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" ) - return { - "score": 0, - "explanation": f"Errore imprevisto: {type(exc).__name__}", - "similarity": 0, - "correctness": 0, - "completeness": 0, - } + raise RuntimeError(str(exc)) from exc -def generate_example_answer_with_llm( - question: str, client_config: dict -): - """Genera una risposta di esempio per una domanda utilizzando un LLM.""" +def evaluate_answer( + question: str, + expected_answer: str, + actual_answer: str, + client_config: Dict[str, Any], +) -> Dict[str, Any]: + """Valuta ``actual_answer`` rispetto a ``expected_answer`` utilizzando un LLM. + + Restituisce i dati di valutazione come dizionario oppure solleva + un'eccezione in caso di errore. + """ client = openai_client.get_openai_client( api_key=client_config.get("api_key"), base_url=client_config.get("endpoint"), ) if not client: - logger.error("Client API per la generazione risposte non configurato.") - return {"answer": None, "error": "Client API non configurato"} + raise ValueError("Errore: Client API per la valutazione non configurato.") - if question is None or not isinstance(question, str) or question.strip() == "": - logger.error("La domanda fornita è vuota o non valida.") - return {"answer": None, "error": "Domanda vuota o non valida"} - - prompt = f"Rispondi alla seguente domanda in modo conciso e accurato: {question}" + prompt = f""" + Sei un valutatore esperto che valuta la qualità delle risposte alle domande. + Domanda: {question} + Risposta Attesa: {expected_answer} + Risposta Effettiva: {actual_answer} + + Valuta la risposta effettiva rispetto alla risposta attesa in base a: + 1. Somiglianza (0-100): Quanto è semanticamente simile la risposta effettiva a quella attesa? + 2. Correttezza (0-100): Le informazioni nella risposta effettiva sono fattualmente corrette? + 3. Completezza (0-100): La risposta effettiva contiene tutti i punti chiave della risposta attesa? + Calcola un punteggio complessivo (0-100) basato su queste metriche. + Fornisci una breve spiegazione della tua valutazione (max 100 parole). + Formatta la tua risposta come un oggetto JSON con questi campi: + - score: il punteggio complessivo (numero) + - explanation: la tua spiegazione (stringa) + - similarity: punteggio di somiglianza (numero) + - correctness: punteggio di correttezza (numero) + - completeness: punteggio di completezza (numero) + Esempio di risposta JSON: + {{ + "score": 95, + "explanation": "La risposta è corretta e completa", + "similarity": 90, + "correctness": 100, + "completeness": 95 + }} + """ api_request_details = { - "model": client_config.get("model", openai_client.DEFAULT_MODEL), + "model": client_config.get("model", DEFAULT_MODEL), "messages": [{"role": "user", "content": prompt}], - "temperature": client_config.get("temperature", 0.7), - "max_tokens": client_config.get("max_tokens", 500), + "temperature": client_config.get("temperature", 0.0), + "max_tokens": client_config.get("max_tokens", 250), + "response_format": {"type": "json_object"}, } try: response = client.chat.completions.create(**api_request_details) - answer = ( - response.choices[0].message.content.strip() - if response.choices and response.choices[0].message.content - else None - ) - return {"answer": answer} - - except (APIConnectionError, RateLimitError, APIStatusError) as e: + choices = getattr(response, "choices", None) + if not choices or not choices[0].message.content: + logger.error("Risposta API priva di 'choices' validi") + raise RuntimeError("Risposta API non valida.") + content = choices[0].message.content + evaluation = json.loads(content) + required_keys = [ + "score", + "explanation", + "similarity", + "correctness", + "completeness", + ] + if not all(key in evaluation for key in required_keys): + raise ValueError(f"Risposta JSON incompleta: {content}") + return evaluation + except json.JSONDecodeError as e: logger.error( - f"Errore API durante la generazione della risposta di esempio: {type(e).__name__} - {e}" + f"Errore: Impossibile decodificare la risposta JSON dalla valutazione LLM: {content}" ) - return {"answer": None, "error": str(e)} + raise ValueError(f"Errore di decodifica JSON: {content[:100]}...") from e + except (APIConnectionError, RateLimitError, APIStatusError) as e: + logger.error(f"Errore API durante la valutazione: {type(e).__name__} - {e}") + raise RuntimeError(str(e)) from e except Exception as exc: # noqa: BLE001 logger.error( - f"Errore imprevisto durante la generazione della risposta: {type(exc).__name__} - {exc}" + f"Errore imprevisto durante la valutazione: {type(exc).__name__} - {exc}" ) - return {"answer": None, "error": str(exc)} + raise RuntimeError(str(exc)) from exc -def execute_llm_test( +def run_test( set_id: str, set_name: str, question_ids: List[str], gen_preset_config: Dict, eval_preset_config: Dict, ) -> Dict: - """Esegue la generazione e valutazione delle risposte tramite LLM.""" - - questions = [ - {"id": q.id, "question": q.domanda, "expected_answer": q.risposta_attesa} - for q in Question.load_all() - ] - questions_df = pd.DataFrame(questions) - - def get_question_data(qid: str): - row = questions_df[questions_df["id"] == str(qid)] - if row.empty: - return None - question = row.iloc[0].get("question", "") - expected = row.iloc[0].get("expected_answer", "") - if not question or not isinstance(question, str) or question.strip() == "": - return None - if not expected or not isinstance(expected, str) or expected.strip() == "": - expected = "Risposta non disponibile" - return {"question": question, "expected_answer": expected} - - results: Dict = {} - for q_id in question_ids: - q_data = get_question_data(q_id) - if not q_data: - continue - generation_output = generate_example_answer_with_llm( - q_data["question"], - client_config=gen_preset_config, - ) - actual_answer = generation_output.get("answer") - - if actual_answer is None: - error_msg = generation_output.get("error", "Generazione fallita") - results[q_id] = { - "question": q_data["question"], - "expected_answer": q_data["expected_answer"], - "actual_answer": error_msg, - "evaluation": {"score": 0, "explanation": error_msg}, + """Esegue un test generando e valutando risposte con LLM.""" + + try: + questions_map = {str(q.id): q for q in Question.load_all()} + results: Dict[str, Dict[str, Any]] = {} + + for q_id in question_ids: + q_obj = questions_map.get(str(q_id)) + if not q_obj: + continue + question = q_obj.domanda or "" + if not question.strip(): + continue + expected = q_obj.risposta_attesa or "Risposta non disponibile" + + try: + actual_answer = generate_answer(question, gen_preset_config) + except Exception as e: # noqa: BLE001 + error_msg = str(e) + evaluation = { + "score": 0, + "explanation": error_msg, + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + actual_answer = error_msg + else: + try: + evaluation = evaluate_answer( + question, expected, actual_answer, eval_preset_config + ) + except Exception as e: # noqa: BLE001 + error_msg = str(e) + evaluation = { + "score": 0, + "explanation": error_msg, + "similarity": 0, + "correctness": 0, + "completeness": 0, + } + + results[str(q_id)] = { + "question": question, + "expected_answer": expected, + "actual_answer": actual_answer, + "evaluation": evaluation, } - continue - evaluation = evaluate_answer( - q_data["question"], - q_data["expected_answer"], - actual_answer, - client_config=eval_preset_config, - ) - results[q_id] = { - "question": q_data["question"], - "expected_answer": q_data["expected_answer"], - "actual_answer": actual_answer, - "evaluation": evaluation, + stats = TestResult.calculate_statistics(results) + result_data = { + "set_name": set_name, + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "avg_score": stats["avg_score"], + "sample_type": "Generata da LLM", + "method": "LLM", + "generation_llm": gen_preset_config.get("model"), + "evaluation_llm": eval_preset_config.get("model"), + "questions": results, + "per_question_scores": stats["per_question_scores"], + "radar_metrics": stats["radar_metrics"], } - if not results: + result_id = TestResult.add_and_refresh(set_id, result_data) + return { + "result_id": result_id, + "avg_score": stats["avg_score"], + "results": results, + "per_question_scores": stats["per_question_scores"], + "radar_metrics": stats["radar_metrics"], + "results_df": TestResult.load_all_df(), + } + except Exception as exc: # noqa: BLE001 + logger.error( + f"Errore durante l'esecuzione del test LLM: {type(exc).__name__} - {exc}" + ) return {} - avg_score = sum(r["evaluation"]["score"] for r in results.values()) / len(results) - result_data = { - "set_name": set_name, - "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "avg_score": avg_score, - "sample_type": "Generata da LLM", - "method": "LLM", - "generation_llm": gen_preset_config.get("model"), - "evaluation_llm": eval_preset_config.get("model"), - "questions": results, - } - - result_id = add_result(set_id, result_data) - results_df = load_results() - - return { - "result_id": result_id, - "avg_score": avg_score, - "results": results, - "results_df": results_df, - } - __all__ = [ "load_results", "refresh_results", - "add_result", - "save_results", - "import_results_from_file", - "calculate_statistics", + "import_results_action", + "generate_answer", "evaluate_answer", - "generate_example_answer_with_llm", - "execute_llm_test", + "run_test", ] diff --git a/initialize_db.py b/initialize_db.py index 2ab60d0..6da421f 100644 --- a/initialize_db.py +++ b/initialize_db.py @@ -1,5 +1,5 @@ import logging -from controllers.startup_controller import setup_logging +from utils.startup_utils import setup_logging logger = logging.getLogger(__name__) diff --git a/models/api_preset.py b/models/api_preset.py index edca062..8529f74 100644 --- a/models/api_preset.py +++ b/models/api_preset.py @@ -1,8 +1,7 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, asdict from typing import List -import pandas as pd from sqlalchemy import select from models.database import DatabaseEngine @@ -40,27 +39,29 @@ def load_all() -> List["APIPreset"]: ] @staticmethod - def save_df(df: pd.DataFrame) -> None: + def save(presets: List["APIPreset"]) -> None: + """Salva un elenco di preset API.""" with DatabaseEngine.instance().get_session() as session: existing_ids = session.execute(select(APIPresetORM.id)).scalars().all() - incoming_ids = df['id'].astype(str).tolist() + incoming_ids = [p.id for p in presets] + for del_id in set(existing_ids) - set(incoming_ids): obj = session.get(APIPresetORM, del_id) if obj: session.delete(obj) - for _, row in df.iterrows(): - params = {k: (None if pd.isna(v) else v) for k, v in row.to_dict().items()} - obj = session.get(APIPresetORM, params['id']) + + for preset in presets: + obj = session.get(APIPresetORM, preset.id) if obj: - obj.name = params['name'] - obj.provider_name = params['provider_name'] - obj.endpoint = params['endpoint'] - obj.api_key = params['api_key'] - obj.model = params['model'] - obj.temperature = params['temperature'] - obj.max_tokens = params['max_tokens'] + obj.name = preset.name + obj.provider_name = preset.provider_name + obj.endpoint = preset.endpoint + obj.api_key = preset.api_key + obj.model = preset.model + obj.temperature = preset.temperature + obj.max_tokens = preset.max_tokens else: - session.add(APIPresetORM(**params)) + session.add(APIPresetORM(**asdict(preset))) session.commit() @staticmethod diff --git a/models/question.py b/models/question.py index 1bf1c6c..2d099a4 100644 --- a/models/question.py +++ b/models/question.py @@ -1,12 +1,15 @@ import logging from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Tuple, Dict, Any import uuid +import pandas as pd from sqlalchemy import select, delete from models.database import DatabaseEngine from models.orm_models import QuestionORM, question_set_questions +from utils.data_format_utils import format_questions_for_view +from utils.file_reader_utils import read_questions, filter_new_rows logger = logging.getLogger(__name__) @@ -81,3 +84,91 @@ def delete(question_id: str) -> None: if q: session.delete(q) session.commit() + + @staticmethod + def _persist_entities(df: pd.DataFrame) -> Tuple[int, List[str]]: + """Persiste nuove domande da ``df`` evitando duplicati. + + Parametri + --------- + df: DataFrame + Dati delle domande normalizzati. + + Restituisce + ----------- + Tuple[int, list[str]] + Numero di domande importate ed elenco degli avvisi. + """ + + warnings: List[str] = [] + with DatabaseEngine.instance().get_session() as session: + existing_ids = session.execute(select(QuestionORM.id)).scalars().all() + + df_unique = df.drop_duplicates(subset="id", keep="first") + duplicated_ids = set(df["id"].astype(str)) - set( + df_unique["id"].astype(str) + ) + for dup in duplicated_ids: + warnings.append( + f"Domanda con ID '{dup}' già presente nel file; saltata." + ) + + new_rows, added_count = filter_new_rows(df_unique, existing_ids) + skipped_ids = set(df_unique["id"].astype(str)) - set( + new_rows["id"].astype(str) + ) + for sid in skipped_ids: + warnings.append( + f"Domanda con ID '{sid}' già esistente; saltata." + ) + + if added_count > 0: + session.bulk_insert_mappings( + QuestionORM, new_rows.to_dict(orient="records") + ) + session.commit() + + return added_count, warnings + + @staticmethod + def import_from_file(file) -> Dict[str, Any]: + """Importa domande da un file CSV o JSON. + + Parametri + --------- + file: file-like + File contenente le domande da importare. + + Restituisce + ----------- + dict + ``{"success": bool, "imported_count": int, "warnings": list[str]}`` + """ + + try: + df = read_questions(file) + except ValueError as exc: + return {"success": False, "imported_count": 0, "warnings": [str(exc)]} + except Exception as exc: # pragma: no cover - defensive + return { + "success": False, + "imported_count": 0, + "warnings": [f"Errore durante la lettura del file: {exc}"], + } + + imported, warnings = Question._persist_entities(df) + + return {"success": True, "imported_count": imported, "warnings": warnings} + + @staticmethod + def filter_by_category( + category: Optional[str] = None, + ) -> Tuple[pd.DataFrame, List[str]]: + """Restituisce le domande filtrate per categoria e tutte le categorie.""" + + from utils.cache import get_questions # Import locale per evitare cicli + df = get_questions() + df, _, categories = format_questions_for_view(df) + filtered_df = df[df["categoria"] == category] if category else df + + return filtered_df, categories diff --git a/models/question_set.py b/models/question_set.py index 77cdcd9..c0619a9 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -1,15 +1,29 @@ import logging from dataclasses import dataclass, field -from typing import List, Optional +from typing import Any, Dict, List, Optional, Tuple import uuid +import pandas as pd from sqlalchemy import select +from utils.file_reader_utils import read_question_sets from models.database import DatabaseEngine from models.orm_models import QuestionSetORM, QuestionORM logger = logging.getLogger(__name__) +@dataclass +class PersistSetsResult: + """Risultato della funzione ``persist_sets``.""" + + sets_df: pd.DataFrame + questions_df: pd.DataFrame + sets_imported_count: int + new_questions_added_count: int + existing_questions_found_count: int + warnings: List[str] + + @dataclass class QuestionSet: id: str @@ -68,3 +82,174 @@ def delete(set_id: str) -> None: if qset: session.delete(qset) session.commit() + + @staticmethod + def _resolve_question_ids( + questions_in_set_data: List[Any], + current_questions: pd.DataFrame, + ) -> Tuple[List[str], pd.DataFrame, int, int, List[str]]: + """Risolve gli identificatori delle domande per un set di domande.""" + warnings: List[str] = [] + question_ids: List[str] = [] + new_added = 0 + existing_found = 0 + + for q_idx, q_data in enumerate(questions_in_set_data): + if isinstance(q_data, dict): + q_id = str(q_data.get("id", "")) + q_text = q_data.get("domanda", "") + q_answer = q_data.get("risposta_attesa", "") + q_category = q_data.get("categoria", "") + else: + q_id = str(q_data) + q_text = "" + q_answer = "" + q_category = "" + + if not q_id: + warnings.append(f"Domanda #{q_idx + 1} senza ID (saltata).") + continue + + if q_text and q_answer: + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + from controllers.question_controller import ( + add_question_if_not_exists, + ) + was_added = add_question_if_not_exists( + question_id=q_id, + domanda=q_text, + risposta_attesa=q_answer, + categoria=q_category, + ) + if was_added: + new_added += 1 + question_ids.append(q_id) + new_row = pd.DataFrame( + { + "id": [q_id], + "domanda": [q_text], + "risposta_attesa": [q_answer], + "categoria": [q_category], + } + ) + current_questions = pd.concat( + [current_questions, new_row], ignore_index=True + ) + else: + existing_found += 1 + question_ids.append(q_id) + continue + + if q_id in current_questions["id"].astype(str).values: + existing_found += 1 + question_ids.append(q_id) + else: + warnings.append( + f"Domanda #{q_idx + 1} con ID {q_id} non trovata e senza dettagli; saltata." + ) + + return question_ids, current_questions, new_added, existing_found, warnings + + @staticmethod + def _persist_entities( + sets_data: List[Dict[str, Any]], + current_questions: pd.DataFrame, + current_sets: pd.DataFrame, + ) -> "PersistSetsResult": + """Crea set di domande dai dati analizzati.""" + if not isinstance(sets_data, list): + raise ValueError("I dati dei set devono essere una lista.") + + sets_imported_count = 0 + new_questions_added_count = 0 + existing_questions_found_count = 0 + warnings: List[str] = [] + + for set_idx, set_data in enumerate(sets_data): + if not isinstance(set_data, dict): + warnings.append( + f"Elemento #{set_idx + 1} nella lista non è un set valido (saltato)." + ) + continue + + set_name = set_data.get("name") + questions_in_set_data = set_data.get("questions", []) + + if not set_name or not isinstance(set_name, str) or not set_name.strip(): + warnings.append( + f"Set #{set_idx + 1} con nome mancante o non valido (saltato)." + ) + continue + + if not isinstance(questions_in_set_data, list): + warnings.append( + f"Dati delle domande mancanti o non validi per il set '{set_name}' (saltato)." + ) + continue + + if set_name in current_sets.get("name", pd.Series([])).values: + warnings.append( + f"Un set con nome '{set_name}' esiste già. Saltato per evitare duplicati." + ) + continue + + ( + question_ids, + current_questions, + added, + existing, + q_warnings, + ) = QuestionSet._resolve_question_ids( + questions_in_set_data, current_questions + ) + warnings.extend(q_warnings) + + if question_ids or len(questions_in_set_data) == 0: + try: + QuestionSet.create(set_name, question_ids) + sets_imported_count += 1 + except Exception as e: # pragma: no cover - protective + warnings.append( + f"Errore durante la creazione del set '{set_name}': {e}" + ) + else: + warnings.append( + f"Il set '{set_name}' non è stato creato perché non conteneva domande valide." + ) + + new_questions_added_count += added + existing_questions_found_count += existing + + from utils.cache import refresh_question_sets as _refresh_question_sets + sets_df = _refresh_question_sets() + + return PersistSetsResult( + sets_df=sets_df, + questions_df=current_questions, + sets_imported_count=sets_imported_count, + new_questions_added_count=new_questions_added_count, + existing_questions_found_count=existing_questions_found_count, + warnings=warnings, + ) + + @staticmethod + def import_from_file(uploaded_file) -> "PersistSetsResult": + """Importa uno o più set di domande da un file JSON o CSV.""" + + if uploaded_file is None: + raise ValueError("Nessun file fornito per l'importazione.") + + data = read_question_sets(uploaded_file) + from controllers.question_controller import load_questions + current_questions = load_questions() + from controllers.question_set_controller import load_sets + current_sets = load_sets() + persist_result = QuestionSet._persist_entities( + data, current_questions, current_sets + ) + + return persist_result + diff --git a/models/test_result.py b/models/test_result.py index 7b21961..50b512d 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -1,14 +1,16 @@ import logging -from dataclasses import dataclass -from typing import Dict, List +from dataclasses import dataclass, asdict import uuid -import json +from typing import Any, Dict, List, Tuple +from functools import lru_cache + import pandas as pd from sqlalchemy import select from models.database import DatabaseEngine from models.orm_models import TestResultORM +from utils.file_reader_utils import read_test_results, filter_new_rows logger = logging.getLogger(__name__) @@ -18,6 +20,7 @@ class TestResult: set_id: str timestamp: str results: Dict + __test__ = False @staticmethod def load_all() -> List["TestResult"]: @@ -34,35 +37,92 @@ def load_all() -> List["TestResult"]: ] @staticmethod - def save_df(df: pd.DataFrame) -> None: - df_to_save = df.copy() - if 'results' in df_to_save.columns: - df_to_save['results'] = df_to_save['results'].apply( - lambda x: json.dumps(x) if isinstance(x, dict) else '{}' + @lru_cache(maxsize=1) + def load_all_df() -> pd.DataFrame: + """Restituisce tutti i risultati come DataFrame pandas con caching.""" + data = [asdict(r) for r in TestResult.load_all()] + columns = ["id", "set_id", "timestamp", "results"] + return pd.DataFrame(data, columns=columns) + + @staticmethod + def refresh_cache() -> pd.DataFrame: + """Svuota e ricarica il DataFrame in cache dei risultati.""" + TestResult.load_all_df.cache_clear() + return TestResult.load_all_df() + + @staticmethod + def _persist_entities(imported_df: pd.DataFrame) -> int: + """Persiste nuovi risultati di test evitando duplicati. + + Parametri + --------- + imported_df: DataFrame + Dati dei risultati normalizzati. + + Restituisce + ----------- + int + Numero di nuovi risultati inseriti. + """ + + existing_df = TestResult.load_all_df() + existing_ids = ( + existing_df["id"].astype(str).tolist() if not existing_df.empty else [] + ) + new_rows, added_count = filter_new_rows(imported_df, existing_ids) + + if added_count > 0: + combined_df = pd.concat([existing_df, new_rows], ignore_index=True) + results = [ + TestResult(**row) for row in combined_df.to_dict(orient="records") + ] + TestResult.save(results) + TestResult.refresh_cache() + return added_count + + @staticmethod + def import_from_file(file) -> Tuple[bool, str]: + """Importa risultati di test da ``file``. + + Il file è analizzato tramite :func:`utils.file_reader_utils.read_test_results`. + I risultati esistenti (corrispondenti per ``id``) vengono ignorati. Le nuove + voci vengono salvate e la cache viene aggiornata. + """ + + try: + imported_df = read_test_results(file) + added_count = TestResult._persist_entities(imported_df) + message = ( + f"Importati {added_count} risultati." + if added_count > 0 + else "Nessun nuovo risultato importato." ) + return True, message + except ValueError as e: + return False, str(e) + except Exception as e: # pragma: no cover + return False, f"Errore durante l'importazione dei risultati: {str(e)}" + + @staticmethod + def save(results: List["TestResult"]) -> None: + """Salva un elenco di risultati di test.""" with DatabaseEngine.instance().get_session() as session: existing_ids = session.execute(select(TestResultORM.id)).scalars().all() - incoming_ids = df_to_save['id'].astype(str).tolist() + incoming_ids = [r.id for r in results] + for rid in set(existing_ids) - set(incoming_ids): obj = session.get(TestResultORM, rid) if obj: session.delete(obj) - for _, row in df_to_save.iterrows(): - params = row.to_dict() - obj = session.get(TestResultORM, params['id']) + + for result in results: + obj = session.get(TestResultORM, result.id) if obj: - obj.set_id = params['set_id'] - obj.timestamp = params['timestamp'] - obj.results = json.loads(params['results']) + obj.set_id = result.set_id + obj.timestamp = result.timestamp + obj.results = result.results else: - session.add( - TestResultORM( - id=params['id'], - set_id=params['set_id'], - timestamp=params['timestamp'], - results=json.loads(params['results']), - ) - ) + session.add(TestResultORM(**asdict(result))) session.commit() @staticmethod @@ -79,3 +139,52 @@ def add(set_id: str, results_data: Dict) -> str: ) session.commit() return result_id + + @staticmethod + def add_and_refresh(set_id: str, results_data: Dict) -> str: + """Salva un singolo risultato e aggiorna il DataFrame in cache.""" + rid = TestResult.add(set_id, results_data) + TestResult.refresh_cache() + return rid + + @staticmethod + def calculate_statistics( + questions_results: Dict[str, Dict[str, Any]] + ) -> Dict[str, Any]: + if not questions_results: + return { + "avg_score": 0, + "per_question_scores": [], + "radar_metrics": { + "similarity": 0, + "correctness": 0, + "completeness": 0, + }, + } + + per_question_scores: List[Dict[str, Any]] = [] + radar_sums = {"similarity": 0, "correctness": 0, "completeness": 0} + + for qdata in questions_results.values(): + evaluation = qdata.get("evaluation", {}) + score = evaluation.get("score", 0) + per_question_scores.append( + {"question": qdata.get("question", "Domanda"), "score": score} + ) + for metric in radar_sums.keys(): + radar_sums[metric] += evaluation.get(metric, 0) + + count = len(per_question_scores) + avg_score = ( + sum(item["score"] for item in per_question_scores) / count if count > 0 else 0 + ) + radar_metrics = { + metric: radar_sums[metric] / count if count > 0 else 0 + for metric in radar_sums + } + return { + "avg_score": avg_score, + "per_question_scores": per_question_scores, + "radar_metrics": radar_metrics, + } + diff --git a/tests/sample_data/question_sets.csv b/tests/sample_data/question_sets.csv new file mode 100644 index 0000000..beb1afa --- /dev/null +++ b/tests/sample_data/question_sets.csv @@ -0,0 +1,5 @@ +name,id,domanda,risposta_attesa,categoria +Set1,q1,Existing question?,Answer1,cat1 +Set2,q1,,, +Set2,q2,New question?,Answer2,cat2 +Set2,,No ID question?,No ID answer,cat3 diff --git a/tests/sample_data/question_sets.json b/tests/sample_data/question_sets.json new file mode 100644 index 0000000..715d273 --- /dev/null +++ b/tests/sample_data/question_sets.json @@ -0,0 +1,8 @@ +[ + {"name": "Set1", "questions": []}, + {"name": "Set2", "questions": [ + {"id": "q1"}, + {"id": "q2", "domanda": "New question?", "risposta_attesa": "Answer2", "categoria": "cat2"}, + {"domanda": "No ID question?", "risposta_attesa": "No ID answer", "categoria": "cat3"} + ]} +] diff --git a/tests/sample_data/questions.csv b/tests/sample_data/questions.csv new file mode 100644 index 0000000..e0425fa --- /dev/null +++ b/tests/sample_data/questions.csv @@ -0,0 +1,4 @@ +id,domanda,risposta_attesa,categoria +q1,Existing question?,Answer1,cat1 +q2,New question?,Answer2,cat2 +q2,Duplicate question?,Answer3,cat3 diff --git a/tests/sample_data/questions.json b/tests/sample_data/questions.json new file mode 100644 index 0000000..3a20528 --- /dev/null +++ b/tests/sample_data/questions.json @@ -0,0 +1,5 @@ +[ + {"id": "q1", "domanda": "Existing question?", "risposta_attesa": "Answer1", "categoria": "cat1"}, + {"id": "q2", "domanda": "New question?", "risposta_attesa": "Answer2", "categoria": "cat2"}, + {"id": "q2", "domanda": "Duplicate question?", "risposta_attesa": "Answer3", "categoria": "cat3"} +] diff --git a/tests/sample_data/test_results.csv b/tests/sample_data/test_results.csv new file mode 100644 index 0000000..80866f3 --- /dev/null +++ b/tests/sample_data/test_results.csv @@ -0,0 +1,3 @@ +id,set_id,timestamp,results +1,s1,2023-01-01,{} +2,s2,2023-01-02,{} diff --git a/tests/sample_data/test_results.json b/tests/sample_data/test_results.json new file mode 100644 index 0000000..0005229 --- /dev/null +++ b/tests/sample_data/test_results.json @@ -0,0 +1,4 @@ +[ + {"id": "1", "set_id": "s1", "timestamp": "2023-01-01", "results": {}}, + {"id": "2", "set_id": "s2", "timestamp": "2023-01-02", "results": {}} +] diff --git a/tests/test_api_preset_controller.py b/tests/test_api_preset_controller.py index a34da64..aa6557e 100644 --- a/tests/test_api_preset_controller.py +++ b/tests/test_api_preset_controller.py @@ -34,10 +34,10 @@ def test_validate_preset_ok(mock_load): @patch("controllers.api_preset_controller.refresh_api_presets") -@patch("controllers.api_preset_controller.APIPreset.save_df") +@patch("controllers.api_preset_controller.APIPreset.save") @patch("controllers.api_preset_controller.load_presets") @patch("controllers.api_preset_controller.uuid.uuid4", return_value="new-id") -def test_save_preset_new(mock_uuid, mock_load, mock_save_df, mock_refresh): +def test_save_preset_new(mock_uuid, mock_load, mock_save, mock_refresh): df = pd.DataFrame([ { "id": "1", @@ -67,10 +67,10 @@ def test_save_preset_new(mock_uuid, mock_load, mock_save_df, mock_refresh): assert ok is True assert "creato" in msg assert returned_df is updated_df - mock_save_df.assert_called_once() - saved_df = mock_save_df.call_args[0][0] - assert "new-id" in saved_df["id"].values - assert "New" in saved_df["name"].values + mock_save.assert_called_once() + saved_presets = mock_save.call_args[0][0] + assert any(p.id == "new-id" for p in saved_presets) + assert any(p.name == "New" for p in saved_presets) @patch("controllers.api_preset_controller.refresh_api_presets") @@ -99,7 +99,7 @@ def test_delete_preset(mock_load, mock_delete, mock_refresh): mock_delete.assert_called_once_with("1") -@patch("controllers.api_preset_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_test_api_connection_delegates(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client diff --git a/tests/test_data_format_utils.py b/tests/test_data_format_utils.py new file mode 100644 index 0000000..febb97c --- /dev/null +++ b/tests/test_data_format_utils.py @@ -0,0 +1,39 @@ +import pandas as pd + +from utils.data_format_utils import build_questions_detail, format_questions_for_view + + +def test_format_questions_for_view_no_category(): + df = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + } + ) + norm_df, question_map, categories = format_questions_for_view(df) + + assert "categoria" in norm_df.columns + assert norm_df.iloc[0]["categoria"] == "N/A" + assert categories == ["N/A"] + assert question_map == {"1": {"domanda": "d1", "categoria": "N/A"}} + + +def test_format_questions_for_view_empty_df(): + df = pd.DataFrame() + norm_df, question_map, categories = format_questions_for_view(df) + + assert list(norm_df.columns) == ["id", "domanda", "risposta_attesa", "categoria"] + assert norm_df.empty + assert question_map == {} + assert categories == [] + + +def test_build_questions_detail(): + question_map = {"1": {"domanda": "d1", "categoria": "A"}} + details = build_questions_detail(question_map, ["1", "2"]) + assert details == [ + {"id": "1", "domanda": "d1", "categoria": "A"}, + {"id": "2", "domanda": "", "categoria": "N/A"}, + ] + assert build_questions_detail(question_map, "notalist") == [] diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py index fd7b9f9..35481f6 100644 --- a/tests/test_evaluate_answer.py +++ b/tests/test_evaluate_answer.py @@ -4,9 +4,11 @@ import sys from unittest.mock import Mock, patch +import pytest + sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from controllers import test_controller # noqa: E402 +from controllers.test_controller import evaluate_answer # noqa: E402 def _mock_response(content: str): @@ -24,7 +26,7 @@ def _mock_response_no_choices(): return mock_resp -@patch("controllers.test_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_evaluate_answer_success(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client @@ -40,7 +42,7 @@ def test_evaluate_answer_success(mock_get_client): json.dumps(evaluation) ) - result = test_controller.evaluate_answer( + result = evaluate_answer( "q", "expected", "actual", {"api_key": "key"} ) @@ -48,40 +50,36 @@ def test_evaluate_answer_success(mock_get_client): assert result["similarity"] == 90 -@patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) +@patch("utils.openai_client.get_openai_client", return_value=None) def test_evaluate_answer_no_client(mock_get_client): - result = test_controller.evaluate_answer( - "q", "expected", "actual", {"api_key": None} - ) - - assert result["score"] == 0 - assert "Client API" in result["explanation"] + with pytest.raises(ValueError): + evaluate_answer( + "q", "expected", "actual", {"api_key": None} + ) -@patch("controllers.test_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_evaluate_answer_json_decode_error(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client mock_client.chat.completions.create.return_value = _mock_response("not json") - result = test_controller.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"} - ) - - assert result["score"] == 0 - assert "Errore di decodifica JSON" in result["explanation"] + with pytest.raises(ValueError): + evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) -@patch("controllers.test_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_evaluate_answer_no_choices(mock_get_client, caplog): mock_client = Mock() mock_get_client.return_value = mock_client mock_client.chat.completions.create.return_value = _mock_response_no_choices() with caplog.at_level(logging.ERROR): - result = test_controller.evaluate_answer( - "q", "expected", "actual", {"api_key": "key"} - ) + with pytest.raises(RuntimeError): + evaluate_answer( + "q", "expected", "actual", {"api_key": "key"} + ) - assert result["score"] == 0 assert "choices" in caplog.text diff --git a/tests/test_import_results.py b/tests/test_import_results.py new file mode 100644 index 0000000..be58f93 --- /dev/null +++ b/tests/test_import_results.py @@ -0,0 +1,38 @@ +import os +import sys +from unittest.mock import patch + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.test_result import TestResult + + +data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + +@pytest.mark.parametrize("filename", ["test_results.csv", "test_results.json"]) +@patch("models.test_result.TestResult.refresh_cache") +@patch("models.test_result.TestResult.save") +@patch("models.test_result.TestResult.load_all_df") +def test_import_from_file_skips_duplicates_and_saves( + mock_load, + mock_save, + mock_refresh, + filename, +): + mock_load.return_value = pd.DataFrame( + [{"id": "1", "set_id": "s1", "timestamp": "t0", "results": {}}] + ) + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + success, message = TestResult.import_from_file(f) + + assert success is True + assert message == "Importati 1 risultati." + mock_save.assert_called_once() + saved = mock_save.call_args[0][0] + assert {r.id for r in saved} == {"1", "2"} + mock_refresh.assert_called_once() + diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py index a7d5a70..896a64d 100644 --- a/tests/test_openai_controllers.py +++ b/tests/test_openai_controllers.py @@ -2,10 +2,13 @@ import sys from unittest.mock import Mock, patch +import pytest + # Aggiunge la cartella principale al percorso dei moduli per i test sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from controllers import api_preset_controller, test_controller # noqa: E402 +from controllers import api_preset_controller # noqa: E402 +from controllers.test_controller import generate_answer # noqa: E402 def _mock_response(content: str): @@ -18,42 +21,32 @@ def _mock_response(content: str): return mock_resp -@patch("controllers.test_controller.openai_client.get_openai_client") -def test_generate_example_answer_success(mock_get_client): +@patch("utils.openai_client.get_openai_client") +def test_generate_answer_success(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client mock_client.chat.completions.create.return_value = _mock_response(" answer ") - result = test_controller.generate_example_answer_with_llm( - "question", {"api_key": "key"} - ) - - assert result["answer"] == "answer" + result = generate_answer("question", {"api_key": "key"}) + assert result == "answer" -@patch("controllers.test_controller.openai_client.get_openai_client", return_value=None) -def test_generate_example_answer_no_client(mock_get_client): - result = test_controller.generate_example_answer_with_llm( - "question", {"api_key": None} - ) - assert result["answer"] is None - assert result["error"] == "Client API non configurato" +@patch("utils.openai_client.get_openai_client", return_value=None) +def test_generate_answer_no_client(mock_get_client): + with pytest.raises(ValueError): + generate_answer("question", {"api_key": None}) -@patch("controllers.test_controller.openai_client.get_openai_client") -def test_generate_example_answer_empty_question(mock_get_client): +@patch("utils.openai_client.get_openai_client") +def test_generate_answer_empty_question(mock_get_client): mock_get_client.return_value = Mock() - result = test_controller.generate_example_answer_with_llm( - "", {"api_key": "key"} - ) - - assert result["answer"] is None - assert result["error"] == "Domanda vuota o non valida" + with pytest.raises(ValueError): + generate_answer("", {"api_key": "key"}) -@patch("controllers.api_preset_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_test_api_connection_success(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client @@ -69,7 +62,7 @@ def test_test_api_connection_success(mock_get_client): assert msg == "Connessione API riuscita!" -@patch("controllers.api_preset_controller.openai_client.get_openai_client") +@patch("utils.openai_client.get_openai_client") def test_test_api_connection_unexpected_response(mock_get_client): mock_client = Mock() mock_get_client.return_value = mock_client @@ -83,7 +76,7 @@ def test_test_api_connection_unexpected_response(mock_get_client): assert "Risposta inattesa" in msg -@patch("controllers.api_preset_controller.openai_client.get_openai_client", return_value=None) +@patch("utils.openai_client.get_openai_client", return_value=None) def test_test_api_connection_no_client(mock_get_client): ok, msg = api_preset_controller.test_api_connection( "key", "endpoint", "model", 0.1, 10 diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index 6fa7135..287ac39 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -2,8 +2,8 @@ import sys from unittest.mock import patch -import io import pandas as pd +import pytest sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -79,19 +79,182 @@ def test_delete_question(mock_delete, mock_refresh): mock_delete.assert_called_once_with("qid") mock_refresh.assert_called_once() +@patch("controllers.question_controller.Question.filter_by_category") +def test_get_filtered_questions(mock_filter): + df = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + "categoria": ["cat1"], + } + ) + mock_filter.return_value = (df, ["cat1", "cat2"]) + + questions, categories = question_controller.get_filtered_questions("cat1") + mock_filter.assert_called_once_with("cat1") + assert categories == ["cat1", "cat2"] + assert questions["id"].tolist() == ["1"] + + +@patch("utils.cache.get_questions") +def test_filter_by_category(mock_get_questions): + mock_get_questions.return_value = pd.DataFrame( + { + "id": ["1", "2"], + "domanda": ["d1", "d2"], + "risposta_attesa": ["a1", "a2"], + "categoria": ["cat1", "cat2"], + } + ) + + filtered_df, categories = question_controller.Question.filter_by_category("cat1") + assert categories == ["cat1", "cat2"] + assert filtered_df["id"].tolist() == ["1"] + + +@patch("utils.cache.get_questions") +def test_filter_by_category_no_category_column(mock_get_questions): + mock_get_questions.return_value = pd.DataFrame( + { + "id": ["1"], + "domanda": ["d1"], + "risposta_attesa": ["a1"], + } + ) + + filtered_df, categories = question_controller.Question.filter_by_category() + assert "categoria" in filtered_df.columns + assert filtered_df.iloc[0]["categoria"] == "N/A" + assert categories == ["N/A"] + + +@patch("utils.cache.get_questions") +def test_filter_by_category_empty_df(mock_get_questions): + mock_get_questions.return_value = pd.DataFrame() + + filtered_df, categories = question_controller.Question.filter_by_category() + assert filtered_df.empty + assert categories == [] + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.load_questions") +def test_get_question_text_found(mock_load, mock_refresh): + mock_load.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) + text = question_controller.get_question_text("1") + mock_refresh.assert_not_called() + assert text == "Q1" + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.load_questions") +def test_get_question_text_refresh(mock_load, mock_refresh): + mock_load.return_value = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) + text = question_controller.get_question_text("1") + mock_refresh.assert_called_once() + assert text == "Q1" + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.load_questions") +def test_get_question_category_found(mock_load, mock_refresh): + mock_load.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) + cat = question_controller.get_question_category("1") + mock_refresh.assert_not_called() + assert cat == "C1" + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.load_questions") +def test_get_question_category_refresh(mock_load, mock_refresh): + mock_load.return_value = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) + cat = question_controller.get_question_category("1") + mock_refresh.assert_called_once() + assert cat == "C1" + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.update_question") +def test_save_question_action_success(mock_update, mock_refresh): + mock_update.return_value = True + df = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = df + + result = question_controller.save_question_action("1", "q", "a", "c") + + mock_update.assert_called_once_with( + "1", domanda="q", risposta_attesa="a", categoria="c" + ) + mock_refresh.assert_called_once() + assert result["success"] is True + assert result["questions_df"].equals(df) + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.update_question") +def test_save_question_action_failure(mock_update, mock_refresh): + mock_update.return_value = False + result = question_controller.save_question_action("1", "q", "a", "c") + + mock_refresh.assert_not_called() + assert result["success"] is False + assert result["questions_df"] is None -def test_import_questions_from_file_invalid_json(): - file = io.StringIO("not json") - file.name = "bad.json" - success, message = question_controller.import_questions_from_file(file) - assert not success - assert message == "Il formato del file json non è valido" +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.delete_question") +def test_delete_question_action(mock_delete, mock_refresh): + df = pd.DataFrame() + mock_refresh.return_value = df + result = question_controller.delete_question_action("1") -def test_import_questions_from_file_invalid_csv(): - file = io.StringIO("id,domanda\n\"1,Test") - file.name = "bad.csv" - success, message = question_controller.import_questions_from_file(file) - assert not success - assert message == "Il formato del file csv non è valido" + mock_delete.assert_called_once_with("1") + mock_refresh.assert_called_once() + assert result.equals(df) + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.import_from_file") +def test_import_questions_action_success(mock_import, mock_refresh): + mock_import.return_value = { + "success": True, + "imported_count": 1, + "warnings": ["w"], + } + df = pd.DataFrame({"id": ["1"]}) + mock_refresh.return_value = df + + uploaded_file = object() + result = question_controller.import_questions_action(uploaded_file) + + mock_import.assert_called_once_with(uploaded_file) + mock_refresh.assert_called_once() + assert result["imported_count"] == 1 + assert result["warnings"] == ["w"] + assert result["questions_df"].equals(df) + + +def test_import_questions_action_no_file(): + with pytest.raises(ValueError, match="Nessun file caricato."): + question_controller.import_questions_action(None) + + +@patch("controllers.question_controller.refresh_questions") +@patch("controllers.question_controller.Question.import_from_file") +def test_import_questions_action_failure(mock_import, mock_refresh): + mock_import.return_value = { + "success": False, + "imported_count": 0, + "warnings": ["err"], + } + + with pytest.raises(ValueError, match="err"): + question_controller.import_questions_action(object()) + + assert mock_import.return_value["imported_count"] == 0 + assert mock_import.return_value["warnings"] == ["err"] + mock_refresh.assert_not_called() diff --git a/tests/test_question_import.py b/tests/test_question_import.py new file mode 100644 index 0000000..2378698 --- /dev/null +++ b/tests/test_question_import.py @@ -0,0 +1,65 @@ +import os +import sys +from unittest.mock import patch + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.question import Question + + +class DummySession: + def __init__(self): + self.inserted = [] + + def execute(self, *_args, **_kwargs): + class Result: + def scalars(self_inner): + class Scal: + def all(self_inner2): + return ["q1"] + return Scal() + return Result() + + def bulk_insert_mappings(self, _orm, data): + self.inserted.extend(data) + + def commit(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + pass + + +class DummyEngine: + def __init__(self): + self.session = DummySession() + + def get_session(self): + return self.session + + +@patch("models.question.DatabaseEngine.instance") +def test_import_from_file_skips_duplicates_and_adds_new(mock_engine): + engine = DummyEngine() + mock_engine.return_value = engine + data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + for filename in ["questions.csv", "questions.json"]: + engine.session.inserted.clear() + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + result = Question.import_from_file(f) + assert result["success"] is True + assert result["imported_count"] == 1 + assert any("q1" in w for w in result["warnings"]) + assert engine.session.inserted == [ + { + "id": "q2", + "domanda": "New question?", + "risposta_attesa": "Answer2", + "categoria": "cat2", + } + ] + diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py index 56db2e3..60fdf78 100644 --- a/tests/test_question_set_controller.py +++ b/tests/test_question_set_controller.py @@ -1,6 +1,7 @@ import os import sys from unittest.mock import patch +import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -35,3 +36,33 @@ def test_delete_set_controller(mock_delete, mock_refresh): mock_delete.assert_called_once_with("sid") mock_refresh.assert_called_once() + +@patch("controllers.question_set_controller._get_question_sets") +@patch("controllers.question_set_controller._get_questions") +def test_prepare_sets_for_view(mock_get_questions, mock_get_sets): + questions_df = pd.DataFrame( + { + "id": ["1", "2"], + "domanda": ["d1", "d2"], + "risposta_attesa": ["a1", "a2"], + "categoria": ["A", "B"], + } + ) + sets_df = pd.DataFrame( + { + "id": ["s1", "s2"], + "name": ["set1", "set2"], + "questions": [["1"], ["2"]], + } + ) + + mock_get_questions.return_value = questions_df + mock_get_sets.return_value = sets_df + + result = question_set_controller.prepare_sets_for_view(["A"]) + + assert result["categories"] == ["A", "B"] + assert result["sets_df"]["id"].tolist() == ["s1"] + assert result["sets_df"].iloc[0]["questions_detail"] == [ + {"id": "1", "domanda": "d1", "categoria": "A"} + ] diff --git a/tests/test_question_set_import.py b/tests/test_question_set_import.py new file mode 100644 index 0000000..eec3e91 --- /dev/null +++ b/tests/test_question_set_import.py @@ -0,0 +1,48 @@ +import os +import sys +from unittest.mock import patch + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from models.question_set import QuestionSet + + +data_dir = os.path.join(os.path.dirname(__file__), "sample_data") + + +@pytest.mark.parametrize("filename", ["question_sets.json", "question_sets.csv"]) +@patch("controllers.question_controller.add_question_if_not_exists") +@patch("models.question_set.QuestionSet.create") +@patch("controllers.question_set_controller.load_sets") +@patch("controllers.question_controller.load_questions") +@patch("utils.cache.refresh_question_sets", return_value=pd.DataFrame()) +def test_import_from_file_handles_duplicates( + mock_refresh, + mock_load_questions, + mock_load_sets, + mock_create, + mock_add_question, + filename, +): + mock_load_questions.return_value = pd.DataFrame( + {"id": ["q1"], "domanda": ["Existing"], "risposta_attesa": ["A1"], "categoria": ["cat1"]} + ) + mock_load_sets.return_value = pd.DataFrame( + {"id": ["s1"], "name": ["Set1"], "questions": [[]]} + ) + mock_add_question.side_effect = lambda question_id, domanda, risposta_attesa, categoria: question_id == "q2" + + with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: + result = QuestionSet.import_from_file(f) + + assert result.sets_imported_count == 1 + assert result.new_questions_added_count == 1 + assert result.existing_questions_found_count == 1 + assert any("Set1" in w for w in result.warnings) + assert any("senza ID" in w for w in result.warnings) + mock_create.assert_called_once_with("Set2", ["q1", "q2"]) + assert mock_add_question.call_count == 1 + diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py index b70b24b..dad79bd 100644 --- a/tests/test_question_set_importer.py +++ b/tests/test_question_set_importer.py @@ -9,31 +9,30 @@ import pandas as pd # noqa: E402 import pytest # noqa: E402 -from controllers.question_set_controller import ( # noqa: E402 - parse_input, - resolve_question_ids, - persist_sets, - import_sets_from_file, +from models.question_set import ( + QuestionSet, + PersistSetsResult, ) +from utils.file_reader_utils import read_question_sets -def test_parse_input_csv_missing_columns(): +def test_read_question_sets_csv_missing_columns(): csv_content = "name,id,domanda\nset1,1,Domanda" file = io.StringIO(csv_content) file.name = "test.csv" with pytest.raises(ValueError): - parse_input(file) + read_question_sets(file) -def test_parse_input_json_not_list(): +def test_read_question_sets_json_not_list(): data = {"name": "set1"} file = io.BytesIO(json.dumps(data).encode("utf-8")) file.name = "test.json" with pytest.raises(ValueError): - parse_input(file) + read_question_sets(file) -@patch("controllers.question_set_controller.add_question_if_not_exists") +@patch("controllers.question_controller.add_question_if_not_exists") def test_resolve_question_ids_adds_and_existing(mock_add): mock_add.return_value = True current_questions = pd.DataFrame( @@ -43,9 +42,13 @@ def test_resolve_question_ids_adds_and_existing(mock_add): {"id": "1", "domanda": "Q1", "risposta_attesa": "A1", "categoria": ""}, {"id": "2"}, ] - ids, updated_df, new_added, existing_found, warnings = resolve_question_ids( - questions, current_questions - ) + ( + ids, + updated_df, + new_added, + existing_found, + warnings, + ) = QuestionSet._resolve_question_ids(questions, current_questions) assert ids == ["1", "2"] assert new_added == 1 assert existing_found == 1 @@ -59,9 +62,13 @@ def test_resolve_question_ids_missing_id(): columns=["id", "domanda", "risposta_attesa", "categoria"] ) questions = [{"domanda": "Q", "risposta_attesa": "A"}] - ids, updated_df, new_added, existing_found, warnings = resolve_question_ids( - questions, current_questions - ) + ( + ids, + updated_df, + new_added, + existing_found, + warnings, + ) = QuestionSet._resolve_question_ids(questions, current_questions) assert ids == [] assert new_added == 0 assert existing_found == 0 @@ -69,8 +76,8 @@ def test_resolve_question_ids_missing_id(): assert updated_df.empty -@patch("controllers.question_set_controller.refresh_question_sets") -@patch("controllers.question_set_controller.QuestionSet.create") +@patch("utils.cache.refresh_question_sets") +@patch("models.question_set.QuestionSet.create") def test_persist_sets_skips_duplicates(mock_create, mock_refresh): mock_refresh.return_value = pd.DataFrame( [{"id": "s1", "name": "Existing", "questions": []}] @@ -85,53 +92,9 @@ def test_persist_sets_skips_duplicates(mock_create, mock_refresh): {"name": "Existing", "questions": []}, {"name": "New", "questions": []}, ] - result = persist_sets(sets_data, current_questions, current_sets) - assert result["sets_imported_count"] == 1 - assert any("esiste già" in w for w in result["warnings"]) + result = QuestionSet._persist_entities(sets_data, current_questions, current_sets) + assert result.sets_imported_count == 1 + assert result.new_questions_added_count == 0 + assert result.existing_questions_found_count == 0 + assert any("esiste già" in w for w in result.warnings) mock_create.assert_called_once_with("New", []) - - -def test_import_sets_from_file_none(): - result = import_sets_from_file(None) - assert not result["success"] - assert "Nessun file" in result["error_message"] - - -def test_import_sets_from_file_invalid_json(): - file = io.BytesIO(b"not json") - file.name = "bad.json" - result = import_sets_from_file(file) - assert result["error_message"] == "Il formato del file json non è valido" - assert not result["success"] - - -def test_import_sets_from_file_invalid_csv(): - file = io.BytesIO(b"id,domanda\n\"1,Test") - file.name = "bad.csv" - result = import_sets_from_file(file) - assert result["error_message"] == "Il formato del file csv non è valido" - assert not result["success"] - - -def test_import_sets_from_file_duplicates_no_error(): - data = [{"name": "Existing", "questions": []}] - file = io.BytesIO(json.dumps(data).encode("utf-8")) - file.name = "test.json" - with ( - patch("controllers.question_set_controller.load_questions") as mock_lq, - patch("controllers.question_set_controller.load_sets") as mock_ls, - patch("controllers.question_set_controller.persist_sets") as mock_ps, - ): - mock_lq.return_value = pd.DataFrame() - mock_ls.return_value = pd.DataFrame() - mock_ps.return_value = { - "success": False, - "success_message": "", - "questions_df": pd.DataFrame(), - "sets_df": pd.DataFrame(), - "warnings": ["dup"], - } - result = import_sets_from_file(file) - assert result["error_message"] == "" - assert result["warnings"] == ["dup"] - assert not result["success"] diff --git a/tests/test_set_helpers.py b/tests/test_set_helpers.py new file mode 100644 index 0000000..b476ecf --- /dev/null +++ b/tests/test_set_helpers.py @@ -0,0 +1,76 @@ +import pandas as pd + +from views.state_models import SetPageState +from models.question_set import PersistSetsResult + +# create dummy st object + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + def __setattr__(self, name, value): + self[name] = value + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.captured_warnings = [] + def warning(self, msg): + self.captured_warnings.append(msg) + + +def test_import_set_callback_message_and_warnings(monkeypatch): + from views import set_helpers + + dummy_st = DummySt() + dummy_st.session_state.uploaded_file_content_set = object() + monkeypatch.setattr(set_helpers, "st", dummy_st) + + result = PersistSetsResult( + sets_df=pd.DataFrame(), + questions_df=pd.DataFrame(), + sets_imported_count=2, + new_questions_added_count=1, + existing_questions_found_count=0, + warnings=["w1", "w2"], + ) + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _: result) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_success is True + assert state.import_set_success_message == "2 set importati. 1 nuove domande aggiunte." + assert dummy_st.captured_warnings == ["w1", "w2"] + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert dummy_st.session_state.uploaded_file_content_set is None + + +def test_import_set_callback_no_imports_with_warnings(monkeypatch): + from views import set_helpers + + dummy_st = DummySt() + dummy_st.session_state.uploaded_file_content_set = object() + monkeypatch.setattr(set_helpers, "st", dummy_st) + + result = PersistSetsResult( + sets_df=pd.DataFrame(), + questions_df=pd.DataFrame(), + sets_imported_count=0, + new_questions_added_count=0, + existing_questions_found_count=0, + warnings=["warn"], + ) + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _: result) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_success is True + assert ( + state.import_set_success_message + == "Nessun set importato. Controlla gli avvisi." + ) + assert dummy_st.captured_warnings == ["warn"] + assert dummy_st.session_state.uploaded_file_content_set is None diff --git a/utils/cache.py b/utils/cache.py index 0b9a97d..34b4356 100644 --- a/utils/cache.py +++ b/utils/cache.py @@ -53,13 +53,9 @@ def refresh_api_presets() -> pd.DataFrame: return get_api_presets() -@lru_cache(maxsize=1) def get_results() -> pd.DataFrame: - data = [asdict(r) for r in TestResult.load_all()] - columns = ["id", "set_id", "timestamp", "results"] - return pd.DataFrame(data, columns=columns) + return TestResult.load_all_df() def refresh_results() -> pd.DataFrame: - get_results.cache_clear() - return get_results() + return TestResult.refresh_cache() diff --git a/utils/data_format_utils.py b/utils/data_format_utils.py new file mode 100644 index 0000000..0d2023b --- /dev/null +++ b/utils/data_format_utils.py @@ -0,0 +1,58 @@ +import pandas as pd +from typing import Any, Dict, List, Tuple + + +def format_questions_for_view( + questions_df: pd.DataFrame, +) -> Tuple[pd.DataFrame, Dict[str, Dict[str, str]], List[str]]: + """Normalizza il DataFrame delle domande per la visualizzazione. + + Garantisce che la colonna ``categoria`` esista e sia riempita con ``N/A`` quando + mancante. Restituisce il DataFrame normalizzato, una mappa degli ID delle domande + ai rispettivi testi e categorie e l'elenco ordinato delle categorie. + """ + if questions_df is None or questions_df.empty: + df = pd.DataFrame(columns=["id", "domanda", "risposta_attesa", "categoria"]) + categories: List[str] = [] + else: + df = questions_df.copy() + if "categoria" not in df.columns: + df["categoria"] = "N/A" + else: + df["categoria"] = df["categoria"].fillna("N/A") + categories = sorted(list(df["categoria"].astype(str).unique())) + + question_map: Dict[str, Dict[str, str]] = { + str(row.get("id", "")): { + "domanda": row.get("domanda", ""), + "categoria": row.get("categoria", "N/A"), + } + for _, row in df.iterrows() + } + + return df, question_map, categories + + +def build_questions_detail( + question_map: Dict[str, Dict[str, str]], + q_ids: Any, +) -> List[Dict[str, str]]: + """Restituisce i dettagli delle domande per ``q_ids`` usando ``question_map``. + + Ogni elemento della lista restituita contiene ``id``, ``domanda`` e ``categoria`` + della domanda. Gli ID non corrispondenti producono testo vuoto con categoria + ``N/A``. Se ``q_ids`` non è una lista, viene restituita una lista vuota. + """ + + details: List[Dict[str, str]] = [] + if isinstance(q_ids, list): + for q_id in q_ids: + info = question_map.get(str(q_id), {}) + details.append( + { + "id": str(q_id), + "domanda": info.get("domanda", ""), + "categoria": info.get("categoria", "N/A"), + } + ) + return details diff --git a/utils/file_reader_utils.py b/utils/file_reader_utils.py new file mode 100644 index 0000000..ed9df84 --- /dev/null +++ b/utils/file_reader_utils.py @@ -0,0 +1,218 @@ +import os +import json +import uuid +from datetime import datetime +from typing import List, Dict, Any, Iterable, Tuple + +import pandas as pd + +__all__ = [ + "read_questions", + "read_question_sets", + "read_test_results", + "filter_new_rows", +] + +REQUIRED_QUESTION_COLUMNS = ["domanda", "risposta_attesa"] +REQUIRED_SET_COLUMNS = ["name", "id", "domanda", "risposta_attesa", "categoria"] +REQUIRED_RESULT_COLUMNS = ["id", "set_id", "timestamp", "results"] + + +def filter_new_rows(df: pd.DataFrame, existing_ids: Iterable[str]) -> Tuple[pd.DataFrame, int]: + """Ritorna le righe di ``df`` il cui ``id`` non è in ``existing_ids``. + + Parametri + --------- + df: + DataFrame contenente una colonna ``id``. + existing_ids: + Insieme di identificatori già presenti nel database. + + Restituisce + ----------- + Tuple[pd.DataFrame, int] + Il DataFrame filtrato con sole righe nuove e il conteggio delle nuove righe. + """ + + if df is None or df.empty: + return df, 0 + + existing_set = {str(eid) for eid in existing_ids} + mask = ~df["id"].astype(str).isin(existing_set) + filtered = df[mask].copy() + return filtered, int(mask.sum()) + + +def read_questions(file) -> pd.DataFrame: + """Legge un file di domande (CSV o JSON) e restituisce un DataFrame normalizzato.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + elif file_extension == ".json": + try: + data = json.load(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + if isinstance(data, list): + df = pd.DataFrame(data) + elif isinstance(data, dict) and isinstance(data.get("questions"), list): + df = pd.DataFrame(data["questions"]) + else: + raise ValueError( + "Il file JSON deve essere una lista di domande o contenere la chiave 'questions'." + ) + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + if df is None or df.empty: + raise ValueError("Il file importato è vuoto o non contiene dati validi.") + + if "question" in df.columns and "domanda" not in df.columns: + df.rename(columns={"question": "domanda"}, inplace=True) + if "expected_answer" in df.columns and "risposta_attesa" not in df.columns: + df.rename(columns={"expected_answer": "risposta_attesa"}, inplace=True) + + if not all(col in df.columns for col in REQUIRED_QUESTION_COLUMNS): + raise ValueError( + f"Il file importato deve contenere le colonne '{REQUIRED_QUESTION_COLUMNS[0]}' e '{REQUIRED_QUESTION_COLUMNS[1]}'." + ) + + if "id" not in df.columns: + df["id"] = [str(uuid.uuid4()) for _ in range(len(df))] + else: + df["id"] = df["id"].astype(str) + + if "categoria" not in df.columns: + df["categoria"] = "" + else: + df["categoria"] = df["categoria"].astype(str).fillna("") + + df["domanda"] = df["domanda"].astype(str).fillna("") + df["risposta_attesa"] = df["risposta_attesa"].astype(str).fillna("") + + return df[["id", "domanda", "risposta_attesa", "categoria"]] + + +def read_question_sets(file) -> List[Dict[str, Any]]: + """Legge un file di set di domande (CSV o JSON) e restituisce una lista di dizionari.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + + missing = [c for c in REQUIRED_SET_COLUMNS if c not in df.columns] + if missing: + raise ValueError( + "Il file CSV deve contenere le colonne " + ", ".join(REQUIRED_SET_COLUMNS) + ) + + sets_dict: Dict[str, List[Dict[str, str]]] = {} + for _, row in df.iterrows(): + name = str(row["name"]).strip() + if not name: + continue + question = { + "id": str(row["id"]).strip() if not pd.isna(row["id"]) else "", + "domanda": str(row["domanda"]).strip() + if not pd.isna(row["domanda"]) + else "", + "risposta_attesa": str(row["risposta_attesa"]).strip() + if not pd.isna(row["risposta_attesa"]) + else "", + "categoria": str(row["categoria"]).strip() + if not pd.isna(row["categoria"]) + else "", + } + sets_dict.setdefault(name, []).append(question) + return [{"name": n, "questions": qs} for n, qs in sets_dict.items()] + + elif file_extension == ".json": + try: + content = file.read() + if isinstance(content, bytes): + content = content.decode("utf-8") + data = json.loads(content) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + + if not isinstance(data, list): + raise ValueError("Il formato del file json non è valido") + return data + + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + +def read_test_results(file) -> pd.DataFrame: + """Legge un file di risultati di test (CSV o JSON) e restituisce un DataFrame normalizzato.""" + if hasattr(file, "seek"): + file.seek(0) + file_extension = os.path.splitext(file.name)[1].lower() + + if file_extension == ".csv": + try: + df = pd.read_csv(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file csv non è valido") from e + elif file_extension == ".json": + try: + data = json.load(file) + except Exception as e: # pragma: no cover - handled via ValueError + raise ValueError("Il formato del file json non è valido") from e + if isinstance(data, dict): + data = [data] + if not isinstance(data, list): + raise ValueError( + "Il file JSON deve contenere un oggetto o una lista di risultati." + ) + df = pd.DataFrame(data) + else: # pragma: no cover - supported formats only + raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") + + if df is None or df.empty: + raise ValueError("Il file importato è vuoto o non contiene dati validi.") + + if "id" not in df.columns: + df["id"] = [str(uuid.uuid4()) for _ in range(len(df))] + else: + df["id"] = df["id"].astype(str) + + if "set_id" not in df.columns: + df["set_id"] = "" + else: + df["set_id"] = df["set_id"].astype(str).fillna("") + + if "timestamp" not in df.columns: + df["timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + else: + df["timestamp"] = df["timestamp"].astype(str).fillna( + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ) + + def _parse_results(value: Any) -> Dict: + if isinstance(value, str): + try: + return json.loads(value) + except Exception: # pragma: no cover - invalid json handled as empty dict + return {} + if isinstance(value, dict): + return value + return {} + + if "results" not in df.columns: + df["results"] = [{} for _ in range(len(df))] + else: + df["results"] = df["results"].apply(_parse_results) + + return df[["id", "set_id", "timestamp", "results"]] diff --git a/controllers/openai_client.py b/utils/openai_client.py similarity index 100% rename from controllers/openai_client.py rename to utils/openai_client.py diff --git a/utils/startup_utils.py b/utils/startup_utils.py new file mode 100644 index 0000000..5d95d7d --- /dev/null +++ b/utils/startup_utils.py @@ -0,0 +1,31 @@ +import logging +import os + +from models.database import DatabaseEngine +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + +logger = logging.getLogger(__name__) + + +def setup_logging(level: int = logging.INFO) -> None: + """Configura il logger radice con un formato di base.""" + logging.basicConfig( + level=level, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + +def initialize_database() -> None: + """Inizializza il database dell'applicazione.""" + DatabaseEngine.instance().init_db() + + +def load_default_config() -> dict: + """Restituisce la configurazione API di default.""" + return { + "api_key": os.environ.get("OPENAI_API_KEY", ""), + "endpoint": DEFAULT_ENDPOINT, + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } diff --git a/views/__init__.py b/views/__init__.py index 06bdc3a..87130c9 100644 --- a/views/__init__.py +++ b/views/__init__.py @@ -1,4 +1,4 @@ -"""Views package.""" +"""Pacchetto delle viste.""" import logging logger = logging.getLogger(__name__) diff --git a/views/api_configurazione.py b/views/api_configurazione.py index c8d27a1..7dd5c24 100644 --- a/views/api_configurazione.py +++ b/views/api_configurazione.py @@ -11,7 +11,7 @@ validate_preset, test_api_connection, ) -from controllers.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT logger = logging.getLogger(__name__) diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py index 54837b7..d59924a 100644 --- a/views/esecuzione_test.py +++ b/views/esecuzione_test.py @@ -2,7 +2,7 @@ import streamlit as st -from controllers import execute_llm_test, load_sets, load_presets +from controllers import run_test, load_sets, load_presets, get_preset_by_name from views.style_utils import add_page_header, add_section_title logger = logging.getLogger(__name__) @@ -84,14 +84,7 @@ def render(): # --- Opzioni API basate su Preset --- add_section_title("Opzioni API basate su Preset", icon="🛠️") - preset_names_to_id = {preset['name']: preset['id'] for _, preset in st.session_state.api_presets.iterrows()} - preset_display_names = list(preset_names_to_id.keys()) - - def get_preset_config_by_name(name): - preset_id = preset_names_to_id.get(name) - if preset_id: - return st.session_state.api_presets[st.session_state.api_presets["id"] == preset_id].iloc[0].to_dict() - return None + preset_display_names = list(st.session_state.api_presets["name"]) # Seleziona preset per generazione risposta (comune a entrambe le modalità) generation_preset_name = st.selectbox( @@ -131,14 +124,20 @@ def get_preset_config_by_name(name): if st.session_state.run_llm_test: st.session_state.run_llm_test = False # Resetta lo stato - gen_preset_config = get_preset_config_by_name(st.session_state.selected_generation_preset_name) - eval_preset_config = get_preset_config_by_name(st.session_state.selected_evaluation_preset_name) + gen_preset_config = get_preset_by_name( + st.session_state.selected_generation_preset_name, + st.session_state.api_presets, + ) + eval_preset_config = get_preset_by_name( + st.session_state.selected_evaluation_preset_name, + st.session_state.api_presets, + ) if not gen_preset_config or not eval_preset_config: st.error("Assicurati di aver selezionato preset validi per generazione e valutazione.") else: with st.spinner("Generazione risposte e valutazione LLM in corso..."): - exec_result = execute_llm_test( + exec_result = run_test( selected_set_id, selected_set['name'], questions_in_set, diff --git a/views/gestione_domande.py b/views/gestione_domande.py index a51a37f..3eeb426 100644 --- a/views/gestione_domande.py +++ b/views/gestione_domande.py @@ -5,11 +5,11 @@ from controllers import ( add_question, - update_question, - delete_question, - filter_questions_by_category, + get_filtered_questions, load_questions, - import_questions_from_file, + save_question_action, + delete_question_action, + import_questions_action, ) from views.style_utils import add_page_header from views.state_models import QuestionPageState @@ -19,65 +19,58 @@ # === FUNZIONI DI CALLBACK === -def save_question_action( - question_id, edited_question, edited_answer, edited_category -) -> QuestionPageState: - """Salva le modifiche alla domanda e restituisce lo stato dell'operazione.""" - state = QuestionPageState() - if update_question( - question_id, - domanda=edited_question, - risposta_attesa=edited_answer, - categoria=edited_category, - ): - state.save_success = True - st.session_state.questions = load_questions() - state.trigger_rerun = True - else: - state.save_error = True - return state - - def create_save_question_callback( question_id, edited_question, edited_answer, edited_category ): def callback(): - st.session_state.question_page_state = save_question_action( - question_id, edited_question, edited_answer, edited_category - ) + state = QuestionPageState() + try: + result = save_question_action( + question_id, edited_question, edited_answer, edited_category + ) + if result["success"]: + state.save_success = True + state.save_success_message = "Domanda salvata." + st.session_state.questions = result["questions_df"] + state.trigger_rerun = True + else: + state.save_error = True + state.save_error_message = "Domanda non salvata." + except Exception as e: + state.save_error = True + state.save_error_message = f"Domanda non salvata: {e}" + st.session_state.question_page_state = state return callback -def delete_question_action(question_id) -> QuestionPageState: - """Elimina la domanda e restituisce lo stato dell'operazione.""" +def import_questions_callback(): + uploaded_file = st.session_state.get("uploaded_file_content") state = QuestionPageState() - delete_question(question_id) - state.delete_success = True - st.session_state.questions = load_questions() - state.trigger_rerun = True - return state - + try: + result = import_questions_action(uploaded_file) + st.session_state.questions = result["questions_df"] + count = result.get("imported_count", 0) + warnings = result.get("warnings", []) -def import_questions_action(uploaded_file) -> QuestionPageState: - """Importa le domande da file e restituisce lo stato dell'operazione.""" - state = QuestionPageState() - if uploaded_file is not None: - success, message = import_questions_from_file(uploaded_file) - if success: + if count > 0: state.import_success = True - state.import_success_message = message - st.session_state.questions = load_questions() - state.trigger_rerun = True + msg = f"Importate con successo {count} domande." + if warnings: + msg = f"{msg} Avvisi: {'; '.join(warnings)}" + state.import_success_message = msg else: state.import_error = True - state.import_error_message = message - return state + msg = "Nessuna domanda importata." + if warnings: + msg = f"{msg} {'; '.join(warnings)}" + state.import_error_message = msg - -def import_questions_callback(): - uploaded_file = st.session_state.get("uploaded_file_content") - st.session_state.question_page_state = import_questions_action(uploaded_file) + state.trigger_rerun = True + except Exception as e: + state.import_error = True + state.import_error_message = str(e) + st.session_state.question_page_state = state st.session_state.upload_questions_file = None st.session_state.uploaded_file_content = None @@ -95,7 +88,16 @@ def confirm_delete_question_dialog(question_id, question_text): with col1: if st.button("Sì, Elimina", type="primary", use_container_width=True): - st.session_state.question_page_state = delete_question_action(question_id) + state = QuestionPageState() + try: + questions = delete_question_action(question_id) + state.delete_success = True + st.session_state.questions = questions + state.trigger_rerun = True + except Exception as e: + state.save_error = True + state.save_error_message = str(e) + st.session_state.question_page_state = state st.rerun() with col2: @@ -108,8 +110,9 @@ def render(): st.session_state.setdefault("question_page_state", QuestionPageState()) state: QuestionPageState = st.session_state.question_page_state - # Carica le domande utilizzando la cache - st.session_state.questions = load_questions() + # Carica le domande utilizzando la cache solo se non già presenti + if "questions" not in st.session_state: + st.session_state.questions = load_questions() # Gestisce la logica di rerun if state.trigger_rerun: @@ -148,8 +151,8 @@ def render(): st.header("Visualizza e Modifica Domande") if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df, unique_categories = filter_questions_by_category() - category_options = ["Tutte le categorie"] + unique_categories + _, categories = get_filtered_questions() + category_options = ["Tutte le categorie"] + categories selected_category = st.selectbox( "Filtra per categoria:", @@ -157,10 +160,8 @@ def render(): index=0 ) - if selected_category == "Tutte le categorie": - filtered_questions_df = questions_df - else: - filtered_questions_df, _ = filter_questions_by_category(selected_category) + filter_cat = None if selected_category == "Tutte le categorie" else selected_category + filtered_questions_df, _ = get_filtered_questions(filter_cat) if not filtered_questions_df.empty: for idx, row in filtered_questions_df.iterrows(): diff --git a/views/gestione_set.py b/views/gestione_set.py index 544640b..ebd6b69 100644 --- a/views/gestione_set.py +++ b/views/gestione_set.py @@ -1,13 +1,15 @@ import logging import streamlit as st -from controllers import create_set, load_sets, load_questions +from controllers import ( + create_set, + load_sets, + prepare_sets_for_view, +) from views.style_utils import add_page_header, add_global_styles from views.state_models import SetPageState from views.set_helpers import ( confirm_delete_set_dialog, import_set_callback, - get_question_text, - get_question_category, mark_expander_open, create_save_set_callback, ) @@ -53,11 +55,10 @@ def render(): st.error(state.import_set_error_message) state.import_set_error = False - # Inizializza i dati utilizzando la cache - if 'questions' not in st.session_state: - st.session_state.questions = load_questions() - if 'question_sets' not in st.session_state: - st.session_state.question_sets = load_sets() + # Inizializza i dati tramite il controller + initial_data = prepare_sets_for_view() + st.session_state.questions = initial_data["questions_df"] + st.session_state.question_sets = initial_data["raw_sets_df"] # Assicurati che esista lo stato degli expander per ogni set if 'question_sets' in st.session_state and not st.session_state.question_sets.empty: @@ -70,14 +71,6 @@ def render(): for sid in current_set_ids: st.session_state.set_expanders.setdefault(sid, False) - # Assicurati che la colonna 'categoria' esista in questions_df e gestisci i NaN - if 'questions' in st.session_state and not st.session_state.questions.empty: - questions_df_temp = st.session_state.questions - if 'categoria' not in questions_df_temp.columns: - questions_df_temp['categoria'] = 'N/A' # Aggiungi colonna se mancante - questions_df_temp['categoria'] = questions_df_temp['categoria'].fillna('N/A') # Riempi NaN - st.session_state.questions = questions_df_temp - # Aggiungi un'intestazione stilizzata add_page_header( "Gestione Set di Domande", @@ -92,76 +85,36 @@ def render(): with tabs[0]: st.header("Visualizza e Modifica Set di Domande") - questions_ready = ('questions' in st.session_state and - not st.session_state.questions.empty and - 'domanda' in st.session_state.questions.columns and - 'categoria' in st.session_state.questions.columns) - sets_ready = 'question_sets' in st.session_state - - if not questions_ready: - st.warning( - "Dati delle domande (incluse categorie) non completamente caricati. " - "Alcune funzionalità potrebbero essere limitate. Vai a 'Gestione Domande'." - ) - # Impedisci l'esecuzione del filtro se i dati delle domande non sono pronti - unique_categories_for_filter = [] - selected_categories = [] - else: - questions_df = st.session_state.questions - # Ottieni categorie uniche per il filtro, escludendo 'N/A' - # se si preferisce non mostrarlo come opzione selezionabile - # o gestendolo specificamente. Per ora, includiamo tutto. - unique_categories_for_filter = sorted( - list(questions_df['categoria'].astype(str).unique()) - ) - if not unique_categories_for_filter: - st.info( - "Nessuna categoria definita nelle domande esistenti per poter filtrare." - ) - - selected_categories = st.multiselect( - "Filtra per categorie (mostra i set che contengono almeno una domanda da " - "OGNI categoria selezionata):", - options=unique_categories_for_filter, - default=[], - key="filter_categories", - ) - - if sets_ready and not st.session_state.question_sets.empty: - question_sets_df = st.session_state.question_sets - display_sets_df = question_sets_df.copy() # Inizia con tutti i set - - if selected_categories and questions_ready: # Applica il filtro solo se categorie selezionate e dati pronti - filtered_set_indices = [] - for idx, set_row in question_sets_df.iterrows(): - question_ids_in_set = set_row.get('questions', []) - if not isinstance(question_ids_in_set, list): - question_ids_in_set = [] - - if not question_ids_in_set: # Se il set non ha domande, non può soddisfare il filtro - continue - - categories_present_in_set = set() - for q_id in question_ids_in_set: - category = get_question_category(str(q_id), questions_df) - categories_present_in_set.add(category) + categories = initial_data["categories"] + selected_categories = st.multiselect( + "Filtra per categorie (mostra i set che contengono almeno una domanda da OGNI categoria selezionata):", + options=categories, + default=[], + key="filter_categories", + ) - # Verifica se il set contiene almeno una domanda da OGNI categoria selezionata - if all(sel_cat in categories_present_in_set for sel_cat in selected_categories): - filtered_set_indices.append(idx) + data = prepare_sets_for_view(selected_categories) + questions_df = data["questions_df"] + display_sets_df = data["sets_df"] + st.session_state.questions = questions_df - display_sets_df = question_sets_df.loc[filtered_set_indices] + questions_ready = ( + not questions_df.empty + and 'domanda' in questions_df.columns + and 'categoria' in questions_df.columns + ) - if display_sets_df.empty and selected_categories: + if display_sets_df.empty: + if selected_categories: st.info( "Nessun set trovato che contenga domande da tutte le categorie selezionate: " f"{', '.join(selected_categories)}." ) - elif display_sets_df.empty and not selected_categories: + else: st.info( "Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'." ) - + else: for idx, row in display_sets_df.iterrows(): exp_key = f"set_expander_{row['id']}" if exp_key not in st.session_state.set_expanders: @@ -190,24 +143,24 @@ def render(): if row['id'] not in st.session_state.question_checkboxes: st.session_state.question_checkboxes[row['id']] = {} - if current_question_ids_in_set: - for q_id in current_question_ids_in_set: - q_text = get_question_text(str(q_id)) - q_cat = get_question_category(str(q_id), questions_df) if questions_ready else 'N/A' + questions_detail = row.get('questions_detail', []) + if questions_detail: + for q in questions_detail: + q_id = str(q.get('id')) + q_text = q.get('domanda', f"ID Domanda: {q_id} (non trovata)") + q_cat = q.get('categoria', 'N/A') display_text = f"{q_text} (Categoria: {q_cat})" - # 使用回调来更新checkbox状态 checkbox_value = st.checkbox( display_text, value=True, key=f"qcheck_{row['id']}_{q_id}", on_change=mark_expander_open, - args=(exp_key,) + args=(exp_key,), ) - st.session_state.question_checkboxes[row['id']][str(q_id)] = checkbox_value + st.session_state.question_checkboxes[row['id']][q_id] = checkbox_value else: st.info("Nessuna domanda in questo set.") - st.subheader("Aggiungi Domande al Set") # 初始化新选择的问题状态 @@ -224,10 +177,11 @@ def render(): if not available_questions_df.empty: question_dict_for_multiselect = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, questions_df)})" - for q_id, q_text in zip( + q_id: f"{q_text} (Cat: {q_cat})" + for q_id, q_text, q_cat in zip( available_questions_df['id'].astype(str), available_questions_df['domanda'], + available_questions_df['categoria'], ) } newly_selected_questions_ids = st.multiselect( @@ -262,9 +216,6 @@ def render(): # Lo stato dell'expander viene aggiornato tramite i callback - elif not sets_ready or (st.session_state.question_sets.empty and not selected_categories): - st.info("Nessun set di domande disponibile. Crea un nuovo set utilizzando la scheda 'Crea Nuovo Set'.") - # Scheda Crea Nuovo Set with tabs[1]: st.header("Crea Nuovo Set di Domande") @@ -283,10 +234,11 @@ def render(): if questions_ready_for_creation: all_questions_df_creation = st.session_state.questions question_dict_for_creation = { - q_id: f"{q_text} (Cat: {get_question_category(q_id, all_questions_df_creation)})" - for q_id, q_text in zip( + q_id: f"{q_text} (Cat: {q_cat})" + for q_id, q_text, q_cat in zip( all_questions_df_creation['id'].astype(str), all_questions_df_creation['domanda'], + all_questions_df_creation['categoria'], ) } diff --git a/views/session_state.py b/views/session_state.py index 8d9d84d..cbd6f4e 100644 --- a/views/session_state.py +++ b/views/session_state.py @@ -2,7 +2,7 @@ import streamlit as st -from controllers import get_initial_state +from controllers.startup_controller import get_initial_state logger = logging.getLogger(__name__) diff --git a/views/set_helpers.py b/views/set_helpers.py index 89b96b1..50d3333 100644 --- a/views/set_helpers.py +++ b/views/set_helpers.py @@ -2,13 +2,8 @@ import streamlit as st -from controllers import ( - update_set, - delete_set, - refresh_question_sets, - import_sets_from_file, - refresh_questions, -) +from controllers import update_set, delete_set +from models.question_set import QuestionSet from .state_models import SetPageState logger = logging.getLogger(__name__) @@ -25,18 +20,50 @@ def save_set_callback( set(kept_questions_ids + [str(q_id) for q_id in newly_selected_questions_ids]) ) - update_set(set_id, edited_name, updated_questions_ids) - state.save_set_success_message = "Set di domande aggiornato con successo!" - state.save_set_success = True - st.session_state.question_sets = refresh_question_sets() + try: + result = update_set(set_id, edited_name, updated_questions_ids) + if isinstance(result, tuple): + sets_df = result[0] + message = result[1] if len(result) > 1 else "Set di domande aggiornato con successo!" + if len(result) > 2 and isinstance(result[2], list): + for warn in result[2]: + st.warning(warn) + else: + sets_df = result + message = "Set di domande aggiornato con successo!" + + state.save_set_success_message = message + state.save_set_success = True + if sets_df is not None: + st.session_state.question_sets = sets_df + except Exception as exc: # pragma: no cover - UI error handling + state.save_set_error = True + state.save_set_error_message = str(exc) + state.trigger_rerun = True def delete_set_callback(set_id: str, state: SetPageState): - delete_set(set_id) - state.delete_set_success_message = "Set di domande eliminato con successo!" - state.delete_set_success = True - st.session_state.question_sets = refresh_question_sets() + try: + result = delete_set(set_id) + if isinstance(result, tuple): + sets_df = result[0] + message = result[1] if len(result) > 1 else "Set di domande eliminato con successo!" + if len(result) > 2 and isinstance(result[2], list): + for warn in result[2]: + st.warning(warn) + else: + sets_df = result + message = "Set di domande eliminato con successo!" + + state.delete_set_success_message = message + state.delete_set_success = True + if sets_df is not None: + st.session_state.question_sets = sets_df + except Exception as exc: # pragma: no cover - UI error handling + state.save_set_error = True + state.save_set_error_message = str(exc) + state.trigger_rerun = True @@ -67,48 +94,44 @@ def import_set_callback(state: SetPageState): state.import_set_error_message = "" uploaded_file = st.session_state.get("uploaded_file_content_set") - result = import_sets_from_file(uploaded_file) + try: + result = QuestionSet.import_from_file(uploaded_file) + + parts: list[str] = [] + if result.sets_imported_count > 0: + parts.append(f"{result.sets_imported_count} set importati") + if result.new_questions_added_count > 0: + parts.append(f"{result.new_questions_added_count} nuove domande aggiunte") + if result.existing_questions_found_count > 0: + parts.append( + f"{result.existing_questions_found_count} domande esistenti referenziate" + ) + + if parts: + message = ". ".join(parts) + "." + else: + message = "Nessun set importato." + if result.warnings: + message += " Controlla gli avvisi." - if result["success"]: state.import_set_success = True - state.import_set_success_message = result["success_message"] - st.session_state.questions = refresh_questions() - st.session_state.question_sets = refresh_question_sets() - else: + state.import_set_success_message = message + + if result.questions_df is not None: + st.session_state.questions = result.questions_df + if result.sets_df is not None: + st.session_state.question_sets = result.sets_df + for warn in result.warnings: + st.warning(warn) + except Exception as exc: # pragma: no cover - UI error handling state.import_set_error = True - state.import_set_error_message = result["error_message"] - - for warn in result.get("warnings", []): - st.warning(warn) + state.import_set_error_message = str(exc) st.session_state.uploaded_file_content_set = None st.session_state.pop("upload_set_file", None) state.trigger_rerun = True -def get_question_text(question_id: str) -> str: - """Ritorna il testo della domanda dato il suo ID.""" - if "questions" in st.session_state and not st.session_state.questions.empty: - if "domanda" not in st.session_state.questions.columns: - st.session_state.questions = refresh_questions() - if "domanda" not in st.session_state.questions.columns: - return f"ID Domanda: {question_id} (colonna 'domanda' mancante)" - - question_row = st.session_state.questions[st.session_state.questions["id"] == str(question_id)] - if not question_row.empty: - return question_row.iloc[0]["domanda"] - return f"ID Domanda: {question_id} (non trovata o dati non caricati)" - - -def get_question_category(question_id: str, questions_df): - """Ritorna la categoria di una domanda dato il suo ID.""" - if questions_df is not None and not questions_df.empty and "categoria" in questions_df.columns: - question_row = questions_df[questions_df["id"] == str(question_id)] - if not question_row.empty: - return question_row.iloc[0]["categoria"] - return "N/A" - - def mark_expander_open(exp_key: str): """Segna l'expander come aperto nello stato di sessione""" if "set_expanders" in st.session_state: diff --git a/views/state_models.py b/views/state_models.py index 9c5d020..afec55e 100644 --- a/views/state_models.py +++ b/views/state_models.py @@ -6,7 +6,7 @@ @dataclass class SetPageState: - """Transient UI state for the question set management page.""" + """Stato UI temporaneo per la pagina di gestione dei set di domande.""" save_set_success: bool = False save_set_success_message: str = "Set aggiornato con successo!" @@ -29,7 +29,7 @@ class SetPageState: @dataclass class QuestionPageState: - """Transient UI state for the question management page.""" + """Stato UI temporaneo per la pagina di gestione delle domande.""" save_success: bool = False save_success_message: str = "Domanda aggiornata con successo!" diff --git a/views/visualizza_risultati.py b/views/visualizza_risultati.py index d559a3e..344b8fe 100644 --- a/views/visualizza_risultati.py +++ b/views/visualizza_risultati.py @@ -7,12 +7,13 @@ import plotly.graph_objects as go from controllers import ( - import_results_from_file, - load_results, - refresh_results, + import_results_action, calculate_statistics, load_sets, - load_presets, + get_results, + list_set_names, + list_model_names, + prepare_select_options, ) from views.style_utils import add_page_header, add_section_title logger = logging.getLogger(__name__) @@ -27,7 +28,7 @@ def render(): # Carica i risultati utilizzando la cache if 'results' not in st.session_state: - st.session_state.results = load_results() + st.session_state.results = get_results(None, None) if st.session_state.results.empty: st.warning("Nessun risultato di test disponibile. Esegui prima alcuni test dalla pagina 'Esecuzione Test'.") st.stop() @@ -36,10 +37,6 @@ def render(): if 'question_sets' not in st.session_state: st.session_state.question_sets = load_sets() - # Carica i preset API utilizzando la cache - if 'api_presets' not in st.session_state: - st.session_state.api_presets = load_presets() - # Stato per messaggi di importazione risultati if 'import_results_success' not in st.session_state: st.session_state.import_results_success = False @@ -55,41 +52,30 @@ def render(): st.error(st.session_state.import_results_message) st.session_state.import_results_error = False - def get_set_name(set_id): - if not st.session_state.question_sets.empty: - set_info = st.session_state.question_sets[st.session_state.question_sets['id'] == str(set_id)] - if not set_info.empty: - return set_info.iloc[0]['name'] - return "Set Sconosciuto" - - def get_model_from_preset_name(preset_name): - """Restituisce il modello associato a un preset, se disponibile.""" - if 'api_presets' in st.session_state and not st.session_state.api_presets.empty: - preset_row = st.session_state.api_presets[st.session_state.api_presets['name'] == str(preset_name)] - if not preset_row.empty: - return preset_row.iloc[0]['model'] - return "Sconosciuto" - def import_results_callback(): """Callback per importare risultati da file JSON.""" - if 'uploaded_results_file' in st.session_state and st.session_state.uploaded_results_file is not None: - success, message = import_results_from_file(st.session_state.uploaded_results_file) - st.session_state.import_results_message = message - st.session_state.import_results_success = success - st.session_state.import_results_error = not success - if success: - st.session_state.results = refresh_results() + if ( + 'uploaded_results_file' in st.session_state + and st.session_state.uploaded_results_file is not None + ): + try: + results_df, message = import_results_action( + st.session_state.uploaded_results_file + ) + st.session_state.import_results_message = message + st.session_state.import_results_success = True + st.session_state.import_results_error = False + st.session_state.results = results_df + except Exception as exc: # noqa: BLE001 + st.session_state.import_results_message = str(exc) + st.session_state.import_results_success = False + st.session_state.import_results_error = True st.session_state.uploaded_results_file = None st.session_state.upload_results = None # Filtri per Set e Modello LLM - all_set_names = sorted({get_set_name(r['set_id']) for _, r in st.session_state.results.iterrows()}) - - all_model_names = sorted({ - r['results']['generation_llm'] - for _, r in st.session_state.results.iterrows() - if r['results'].get('generation_llm') - }) + all_set_names = list_set_names(st.session_state.results, st.session_state.question_sets) + all_model_names = list_model_names(st.session_state.results) selected_set_filter = st.selectbox( "Filtra per Set", @@ -105,47 +91,14 @@ def import_results_callback(): key="filter_model_name" ) - filtered_results_df = st.session_state.results - if selected_set_filter != "Tutti": - set_ids = st.session_state.question_sets[ - st.session_state.question_sets['name'] == selected_set_filter - ]['id'].astype(str) - filtered_results_df = filtered_results_df[ - filtered_results_df['set_id'].astype(str).isin(set_ids) - ] - - if selected_model_filter != "Tutti": - filtered_results_df = filtered_results_df[ - filtered_results_df['results'].apply( - lambda res: res.get('generation_llm') == selected_model_filter - ) - ] - - # Elabora i risultati per la visualizzazione nel selectbox - processed_results_for_select = [] - for _, row in filtered_results_df.iterrows(): - result_data = row['results'] # Questo è il dizionario che contiene tutti i dettagli - set_name = get_set_name(row['set_id']) - avg_score = result_data.get('avg_score', 0) - method = result_data.get('method', 'N/A') - method_icon = "🤖" if method == "LLM" else "📊" - - processed_results_for_select.append( - { - 'id': row['id'], - 'display_name': ( - f"{row['timestamp']} - {method_icon} {set_name} " - f"(Avg: {avg_score:.2f}%) - {method}" - ), - } - ) - - processed_results_for_select.sort( - key=lambda x: x['display_name'].split(' - ')[0], - reverse=True, - ) # Ordina per timestamp + filtered_results_df = get_results( + None if selected_set_filter == "Tutti" else selected_set_filter, + None if selected_model_filter == "Tutti" else selected_model_filter, + ) - result_options = {r['id']: r['display_name'] for r in processed_results_for_select} + result_options = prepare_select_options( + filtered_results_df, st.session_state.question_sets + ) # Seleziona il risultato da visualizzare selected_result_id = st.selectbox( @@ -171,9 +124,15 @@ def import_results_callback(): st.stop() # Ottieni i dati del risultato selezionato - selected_result_row = st.session_state.results[st.session_state.results['id'] == selected_result_id].iloc[0] + selected_result_row = filtered_results_df[ + filtered_results_df['id'] == selected_result_id + ].iloc[0] result_data = selected_result_row['results'] - set_name = get_set_name(selected_result_row['set_id']) + set_name_map = { + str(row['id']): row['name'] + for row in st.session_state.question_sets.to_dict('records') + } + set_name = set_name_map.get(str(selected_result_row['set_id']), 'Set Sconosciuto') questions_results = result_data.get('questions', {}) with st.expander("Esporta/Importa Risultati"): @@ -215,7 +174,9 @@ def import_results_callback(): compare_result_data = None compare_questions_results = {} if compare_result_id: - compare_result_row = st.session_state.results[st.session_state.results['id'] == compare_result_id].iloc[0] + compare_result_row = filtered_results_df[ + filtered_results_df['id'] == compare_result_id + ].iloc[0] compare_result_data = compare_result_row['results'] compare_questions_results = compare_result_data.get('questions', {}) From f4f3b0aae0f12ce60fb5810ace00518116f3282c Mon Sep 17 00:00:00 2001 From: oniichan Date: Mon, 11 Aug 2025 14:09:44 +0200 Subject: [PATCH 19/41] modified requirements --- requirements-dev.txt | 2 ++ requirements.txt | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 26c4295..1b5720c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,4 @@ flake8 mypy +pytest>=7.0 +pytest-cov>=4.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 160bebf..b96b500 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,6 @@ sqlalchemy>=2.0.0 pymysql>=1.0.0 cryptography>=42.0.0 # Dipendenze per i test -pytest>=7.0 -pytest-cov>=4.0 # Nota: uuid e configparser sono moduli integrati in Python # installa con pip install -r requirements.txt From 4a24fe681b1abf10ee43195403a4badcf9d4a701 Mon Sep 17 00:00:00 2001 From: oniichan Date: Tue, 12 Aug 2025 00:25:48 +0200 Subject: [PATCH 20/41] added all test unit --- controllers/question_controller.py | 4 +- controllers/question_set_controller.py | 16 +- controllers/startup_controller.py | 9 +- controllers/test_controller.py | 18 ++- models/cached_data.py | 10 +- models/database.py | 33 +++-- models/orm_models.py | 46 +++--- models/question.py | 8 +- models/question_set.py | 4 +- models/test_result.py | 23 +-- tests/conftest.py | 25 ++++ tests/test_api_configurazione_view.py | 148 +++++++++++++++++++ tests/test_api_preset_controller.py | 101 +++++++------ tests/test_api_preset_model.py | 133 +++++++++++++++++ tests/test_app.py | 68 +++++++++ tests/test_cache_utils.py | 170 +++++++++++++++++++++ tests/test_component_utils_view.py | 39 +++++ tests/test_esecuzione_test_view.py | 38 +++++ tests/test_evaluate_answer.py | 43 +++--- tests/test_file_reader_utils.py | 90 ++++++++++++ tests/test_gestione_domande_view.py | 183 +++++++++++++++++++++++ tests/test_gestione_set_view.py | 187 ++++++++++++++++++++++++ tests/test_home_view.py | 47 ++++++ tests/test_import_results.py | 14 +- tests/test_initialize_db.py | 32 ++++ tests/test_models_cached_data.py | 49 +++++++ tests/test_models_database.py | 49 +++++++ tests/test_models_orm.py | 92 ++++++++++++ tests/test_models_question.py | 33 +++++ tests/test_models_question_set.py | 43 ++++++ tests/test_models_test_result.py | 35 +++++ tests/test_openai_client.py | 60 ++++++++ tests/test_openai_controllers.py | 51 ++++--- tests/test_question_controller.py | 117 ++++++++------- tests/test_question_import.py | 5 +- tests/test_question_set_controller.py | 42 ++++-- tests/test_question_set_import.py | 28 ++-- tests/test_question_set_importer.py | 13 +- tests/test_result_controller.py | 92 ++++++++++++ tests/test_session_state.py | 28 ++++ tests/test_set_helpers.py | 134 +++++++++++++---- tests/test_startup_controller.py | 54 +++++++ tests/test_startup_utils.py | 39 +++++ tests/test_state_models.py | 21 +++ tests/test_style_utils.py | 56 +++++++ tests/test_test_controller.py | 93 ++++++++++++ tests/test_ui_utils.py | 21 +++ tests/test_visualizza_risultati_view.py | 150 +++++++++++++++++++ utils/file_reader_utils.py | 8 +- utils/openai_client.py | 49 +++++-- utils/startup_utils.py | 27 +++- views/api_configurazione.py | 2 +- views/component_utils.py | 3 +- views/set_helpers.py | 9 +- 54 files changed, 2569 insertions(+), 323 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_api_configurazione_view.py create mode 100644 tests/test_api_preset_model.py create mode 100644 tests/test_app.py create mode 100644 tests/test_cache_utils.py create mode 100644 tests/test_component_utils_view.py create mode 100644 tests/test_esecuzione_test_view.py create mode 100644 tests/test_file_reader_utils.py create mode 100644 tests/test_gestione_domande_view.py create mode 100644 tests/test_gestione_set_view.py create mode 100644 tests/test_home_view.py create mode 100644 tests/test_initialize_db.py create mode 100644 tests/test_models_cached_data.py create mode 100644 tests/test_models_database.py create mode 100644 tests/test_models_orm.py create mode 100644 tests/test_models_question.py create mode 100644 tests/test_models_question_set.py create mode 100644 tests/test_models_test_result.py create mode 100644 tests/test_openai_client.py create mode 100644 tests/test_result_controller.py create mode 100644 tests/test_session_state.py create mode 100644 tests/test_startup_controller.py create mode 100644 tests/test_startup_utils.py create mode 100644 tests/test_state_models.py create mode 100644 tests/test_style_utils.py create mode 100644 tests/test_test_controller.py create mode 100644 tests/test_ui_utils.py create mode 100644 tests/test_visualizza_risultati_view.py diff --git a/controllers/question_controller.py b/controllers/question_controller.py index 6812c64..e83f471 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,7 +1,7 @@ """Controller per la gestione delle domande senza layer di service.""" import logging -from typing import Optional, Tuple, List, Dict, Any +from typing import IO, Optional, Tuple, List, Dict, Any import pandas as pd @@ -106,7 +106,7 @@ def delete_question_action(question_id: str) -> pd.DataFrame: return questions -def import_questions_action(uploaded_file) -> Dict[str, Any]: +def import_questions_action(uploaded_file: IO[str] | IO[bytes]) -> Dict[str, Any]: """Importa domande da file e restituisce i risultati dell'operazione. Parametri diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 2a96173..01a2797 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -36,16 +36,20 @@ def update_set( set_id: str, name: Optional[str] = None, question_ids: Optional[List[str]] = None, -) -> None: - """Aggiorna un set di domande esistente e ricarica la cache.""" +) -> pd.DataFrame: + """Aggiorna un set di domande esistente e ricarica la cache. + + Restituisce il DataFrame aggiornato dei set di domande.""" QuestionSet.update(set_id, name, question_ids) - refresh_question_sets() + return refresh_question_sets() -def delete_set(set_id: str) -> None: - """Elimina un set di domande e aggiorna la cache.""" +def delete_set(set_id: str) -> pd.DataFrame: + """Elimina un set di domande e aggiorna la cache. + + Restituisce il DataFrame aggiornato dei set di domande.""" QuestionSet.delete(set_id) - refresh_question_sets() + return refresh_question_sets() def prepare_sets_for_view( diff --git a/controllers/startup_controller.py b/controllers/startup_controller.py index 5d98551..9908827 100644 --- a/controllers/startup_controller.py +++ b/controllers/startup_controller.py @@ -2,19 +2,20 @@ from utils.cache import get_questions, get_question_sets, get_results from utils.startup_utils import ( - setup_logging, + DefaultConfig, initialize_database, load_default_config, + setup_logging, ) logger = logging.getLogger(__name__) -def get_initial_state() -> dict: +def get_initial_state() -> dict[str, object]: """Restituisce lo stato predefinito dell'applicazione.""" initialize_database() - defaults = load_default_config() - cached_data = { + defaults: DefaultConfig = load_default_config() + cached_data: dict[str, object] = { "questions": get_questions(), "question_sets": get_question_sets(), "results": get_results(), diff --git a/controllers/test_controller.py b/controllers/test_controller.py index b2a098c..4a99bdf 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -5,7 +5,7 @@ import json import logging from datetime import datetime -from typing import Dict, List, Tuple, Any +from typing import Any, Dict, IO, List, Tuple import pandas as pd from openai import APIConnectionError, APIStatusError, RateLimitError @@ -29,7 +29,9 @@ def refresh_results() -> pd.DataFrame: return TestResult.refresh_cache() -def import_results_action(uploaded_file) -> Tuple[pd.DataFrame, str]: +def import_results_action( + uploaded_file: IO[str] | IO[bytes], +) -> Tuple[pd.DataFrame, str]: """Importa risultati da ``uploaded_file`` e restituisce il DataFrame aggiornato. Parametri @@ -65,8 +67,9 @@ def generate_answer(question: str, client_config: Dict[str, Any]) -> str: un'eccezione. """ + api_key = str(client_config.get("api_key", "")) client = openai_client.get_openai_client( - api_key=client_config.get("api_key"), + api_key=api_key, base_url=client_config.get("endpoint"), ) if not client: @@ -115,8 +118,9 @@ def evaluate_answer( un'eccezione in caso di errore. """ + api_key = str(client_config.get("api_key", "")) client = openai_client.get_openai_client( - api_key=client_config.get("api_key"), + api_key=api_key, base_url=client_config.get("endpoint"), ) if not client: @@ -195,9 +199,9 @@ def run_test( set_id: str, set_name: str, question_ids: List[str], - gen_preset_config: Dict, - eval_preset_config: Dict, -) -> Dict: + gen_preset_config: dict[str, Any], + eval_preset_config: dict[str, Any], +) -> dict[str, Any]: """Esegue un test generando e valutando risposte con LLM.""" try: diff --git a/models/cached_data.py b/models/cached_data.py index 1f5430d..869a950 100644 --- a/models/cached_data.py +++ b/models/cached_data.py @@ -1,4 +1,5 @@ import logging +from typing import List from models.api_preset import APIPreset from models.question import Question @@ -6,18 +7,17 @@ from models.test_result import TestResult logger = logging.getLogger(__name__) - -def get_questions(): +def get_questions() -> List[Question]: return Question.load_all() -def get_question_sets(): +def get_question_sets() -> List[QuestionSet]: return QuestionSet.load_all() -def get_api_presets(): +def get_api_presets() -> List[APIPreset]: return APIPreset.load_all() -def get_results(): +def get_results() -> List[TestResult]: return TestResult.load_all() diff --git a/models/database.py b/models/database.py index 492dc55..84583d7 100644 --- a/models/database.py +++ b/models/database.py @@ -2,8 +2,11 @@ import threading import configparser from pathlib import Path +from typing import Mapping, Optional + from sqlalchemy import create_engine, text -from sqlalchemy.orm import declarative_base, sessionmaker +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session, sessionmaker, DeclarativeBase logger = logging.getLogger(__name__) @@ -15,8 +18,8 @@ class DatabaseEngine: _instance_lock = threading.Lock() def __init__(self) -> None: - self._engine = None - self._session_factory = None + self._engine: Optional[Engine] = None + self._session_factory: Optional[sessionmaker] = None self._engine_lock = threading.Lock() self._session_lock = threading.Lock() @@ -28,7 +31,7 @@ def instance(cls) -> "DatabaseEngine": cls._instance = cls() return cls._instance - def _load_config(self): + def _load_config(self) -> Mapping[str, str]: config = configparser.ConfigParser() root = Path(__file__).resolve().parent.parent cfg_path = root / "db.config" @@ -37,7 +40,7 @@ def _load_config(self): config.read(cfg_path) return config["mysql"] - def _ensure_database(self, cfg): + def _ensure_database(self, cfg: Mapping[str, str]) -> None: """Crea il database di destinazione se non esiste già.""" root_url = ( f"mysql+pymysql://{cfg['user']}:{cfg['password']}@{cfg['host']}:{cfg.get('port', 3306)}" @@ -54,11 +57,15 @@ def _ensure_database(self, cfg): cfg.get("user"), ) raise RuntimeError( - f"Impossibile creare il database '{cfg.get('database')}' sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " - "Il server del database potrebbe non essere raggiungibile, le credenziali potrebbero essere errate o l'utente potrebbe non avere privilegi sufficienti." + ( + f"Impossibile creare il database '{cfg.get('database')}' " + f"sull'host '{cfg.get('host')}' per l'utente '{cfg.get('user')}'. " + "Il server del database potrebbe non essere raggiungibile, le credenziali potrebbero essere errate " + "o l'utente potrebbe non avere privilegi sufficienti." + ) ) from exc - def get_engine(self): + def get_engine(self) -> Engine: if self._engine is None: with self._engine_lock: if self._engine is None: @@ -72,21 +79,25 @@ def get_engine(self): pool_pre_ping=True, pool_recycle=3600, ) + assert self._engine is not None return self._engine - def get_session(self): + def get_session(self) -> Session: if self._session_factory is None: with self._session_lock: if self._session_factory is None: engine = self.get_engine() self._session_factory = sessionmaker(bind=engine) + assert self._session_factory is not None return self._session_factory() - def init_db(self): + def init_db(self) -> None: engine = self.get_engine() import models.orm_models # noqa: F401 Base.metadata.create_all(engine) -Base = declarative_base() +class Base(DeclarativeBase): + """Base class per i modelli dichiarativi SQLAlchemy.""" + pass diff --git a/models/orm_models.py b/models/orm_models.py index 7f4b6fe..27a4bbd 100644 --- a/models/orm_models.py +++ b/models/orm_models.py @@ -1,11 +1,11 @@ """Modelli ORM SQLAlchemy per i dati dell'applicazione.""" -# mypy: ignore-errors - import logging +from typing import List + from sqlalchemy import Column, String, Text, Float, Integer, ForeignKey, Table, JSON -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from .database import Base logger = logging.getLogger(__name__) @@ -21,41 +21,41 @@ class QuestionORM(Base): __tablename__ = "questions" - id = Column(String(36), primary_key=True) - domanda = Column(Text, nullable=False) - risposta_attesa = Column(Text, nullable=False) - categoria = Column(Text, default="") + id: Mapped[str] = mapped_column(String(36), primary_key=True) + domanda: Mapped[str] = mapped_column(Text, nullable=False) + risposta_attesa: Mapped[str] = mapped_column(Text, nullable=False) + categoria: Mapped[str] = mapped_column(Text, default="") - sets = relationship( + sets: Mapped[List["QuestionSetORM"]] = relationship( "QuestionSetORM", secondary=question_set_questions, back_populates="questions" ) class QuestionSetORM(Base): __tablename__ = "question_sets" - id = Column(String(36), primary_key=True) - name = Column(Text, nullable=False) + id: Mapped[str] = mapped_column(String(36), primary_key=True) + name: Mapped[str] = mapped_column(Text, nullable=False) - questions = relationship( + questions: Mapped[List["QuestionORM"]] = relationship( "QuestionORM", secondary=question_set_questions, back_populates="sets" ) class TestResultORM(Base): __tablename__ = "test_results" - id = Column(String(36), primary_key=True) - set_id = Column(String(36)) - timestamp = Column(Text) - results = Column(JSON) + id: Mapped[str] = mapped_column(String(36), primary_key=True) + set_id: Mapped[str] = mapped_column(String(36)) + timestamp: Mapped[str] = mapped_column(Text) + results: Mapped[dict] = mapped_column(JSON) class APIPresetORM(Base): __tablename__ = "api_presets" - id = Column(String(36), primary_key=True) - name = Column(Text) - provider_name = Column(Text) - endpoint = Column(Text) - api_key = Column(Text) - model = Column(Text) - temperature = Column(Float) - max_tokens = Column(Integer) + id: Mapped[str] = mapped_column(String(36), primary_key=True) + name: Mapped[str] = mapped_column(Text) + provider_name: Mapped[str] = mapped_column(Text) + endpoint: Mapped[str] = mapped_column(Text) + api_key: Mapped[str] = mapped_column(Text) + model: Mapped[str] = mapped_column(Text) + temperature: Mapped[float] = mapped_column(Float) + max_tokens: Mapped[int] = mapped_column(Integer) diff --git a/models/question.py b/models/question.py index 2d099a4..54e33c2 100644 --- a/models/question.py +++ b/models/question.py @@ -1,10 +1,11 @@ import logging from dataclasses import dataclass -from typing import List, Optional, Tuple, Dict, Any +from typing import IO, List, Optional, Tuple, Dict, Any, cast import uuid import pandas as pd from sqlalchemy import select, delete +from sqlalchemy.orm import Mapper from models.database import DatabaseEngine from models.orm_models import QuestionORM, question_set_questions @@ -124,14 +125,15 @@ def _persist_entities(df: pd.DataFrame) -> Tuple[int, List[str]]: if added_count > 0: session.bulk_insert_mappings( - QuestionORM, new_rows.to_dict(orient="records") + cast(Mapper[Any], QuestionORM.__mapper__), + new_rows.to_dict(orient="records"), ) session.commit() return added_count, warnings @staticmethod - def import_from_file(file) -> Dict[str, Any]: + def import_from_file(file: IO[str] | IO[bytes]) -> Dict[str, Any]: """Importa domande da un file CSV o JSON. Parametri diff --git a/models/question_set.py b/models/question_set.py index c0619a9..79c5767 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, IO, List, Optional, Tuple import uuid import pandas as pd from sqlalchemy import select @@ -236,7 +236,7 @@ def _persist_entities( ) @staticmethod - def import_from_file(uploaded_file) -> "PersistSetsResult": + def import_from_file(uploaded_file: IO[str] | IO[bytes]) -> "PersistSetsResult": """Importa uno o più set di domande da un file JSON o CSV.""" if uploaded_file is None: diff --git a/models/test_result.py b/models/test_result.py index 50b512d..13fc5cd 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, asdict import uuid -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, cast from functools import lru_cache import pandas as pd @@ -19,7 +19,7 @@ class TestResult: id: str set_id: str timestamp: str - results: Dict + results: dict[str, Any] __test__ = False @staticmethod @@ -28,10 +28,10 @@ def load_all() -> List["TestResult"]: results = session.execute(select(TestResultORM)).scalars().all() return [ TestResult( - id=r.id, - set_id=r.set_id, - timestamp=r.timestamp, - results=r.results or {}, + id=cast(str, r.id), + set_id=cast(str, r.set_id), + timestamp=cast(str, r.timestamp), + results=cast(dict[str, Any], r.results or {}), ) for r in results ] @@ -118,15 +118,16 @@ def save(results: List["TestResult"]) -> None: for result in results: obj = session.get(TestResultORM, result.id) if obj: - obj.set_id = result.set_id - obj.timestamp = result.timestamp - obj.results = result.results + obj_cast = cast(Any, obj) + obj_cast.set_id = result.set_id + obj_cast.timestamp = result.timestamp + obj_cast.results = result.results else: session.add(TestResultORM(**asdict(result))) session.commit() @staticmethod - def add(set_id: str, results_data: Dict) -> str: + def add(set_id: str, results_data: dict[str, Any]) -> str: result_id = str(uuid.uuid4()) with DatabaseEngine.instance().get_session() as session: session.add( @@ -141,7 +142,7 @@ def add(set_id: str, results_data: Dict) -> str: return result_id @staticmethod - def add_and_refresh(set_id: str, results_data: Dict) -> str: + def add_and_refresh(set_id: str, results_data: dict[str, Any]) -> str: """Salva un singolo risultato e aggiorna il DataFrame in cache.""" rid = TestResult.add(set_id, results_data) TestResult.refresh_cache() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..753bd35 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,25 @@ +import pathlib +import sys + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from models.database import DatabaseEngine, Base + + +@pytest.fixture() +def in_memory_db(): + # Reset singleton to ensure clean state + DatabaseEngine._instance = None # type: ignore[attr-defined] + db = DatabaseEngine.instance() + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + db._engine = engine # type: ignore[attr-defined] + db._session_factory = sessionmaker(bind=engine) # type: ignore[attr-defined] + yield db + # Reset after test + DatabaseEngine._instance = None # type: ignore[attr-defined] + diff --git a/tests/test_api_configurazione_view.py b/tests/test_api_configurazione_view.py new file mode 100644 index 0000000..508b172 --- /dev/null +++ b/tests/test_api_configurazione_view.py @@ -0,0 +1,148 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT +from views import api_configurazione as view + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.errors = [] + self.successes = [] + + def error(self, msg): + self.errors.append(msg) + + def success(self, msg): + self.successes.append(msg) + + +def test_start_new_preset_edit_initializes_session_state(monkeypatch): + dummy = DummySt() + monkeypatch.setattr(view, "st", dummy) + + view.start_new_preset_edit() + + assert dummy.session_state.editing_preset is True + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == { + "name": "", + "endpoint": DEFAULT_ENDPOINT, + "api_key": "", + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } + + +def test_start_existing_preset_edit_initializes_session_state(monkeypatch, mocker): + dummy = DummySt() + dummy.session_state.api_presets = object() + monkeypatch.setattr(view, "st", dummy) + + preset = { + "name": "Existing", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": "0.2", + "max_tokens": "200", + } + mocker.patch("views.api_configurazione.get_preset_by_id", return_value=preset) + view.start_existing_preset_edit("123") + + assert dummy.session_state.editing_preset is True + assert dummy.session_state.current_preset_edit_id == "123" + assert dummy.session_state.preset_form_data == { + "name": "Existing", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.2, + "max_tokens": 200, + } + assert dummy.errors == [] + + +def test_save_preset_from_form_validation_error(monkeypatch, mocker): + dummy = DummySt() + dummy.session_state.preset_form_data = {} + dummy.session_state.current_preset_edit_id = None + monkeypatch.setattr(view, "st", dummy) + + mocker.patch("views.api_configurazione.validate_preset", return_value=(False, "err")) + mock_save = mocker.patch("views.api_configurazione.save_preset") + view.save_preset_from_form() + mock_save.assert_not_called() + + assert dummy.errors == ["err"] + + +def test_save_preset_from_form_success(monkeypatch, mocker): + dummy = DummySt() + dummy.session_state.update( + { + "preset_form_data": {}, + "current_preset_edit_id": "1", + "editing_preset": True, + "preset_name": "Name", + "preset_endpoint": "e", + "preset_api_key": "k", + "preset_model": "m", + "preset_temperature": 0.2, + "preset_max_tokens": 200, + } + ) + monkeypatch.setattr(view, "st", dummy) + + updated_df = pd.DataFrame([{"id": "1"}]) + mocker.patch("views.api_configurazione.validate_preset", return_value=(True, "")) + mocker.patch( + "views.api_configurazione.save_preset", + return_value=(True, "saved", updated_df), + ) + view.save_preset_from_form() + + assert dummy.session_state.api_presets is updated_df + assert dummy.successes == ["saved"] + assert dummy.session_state.editing_preset is False + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == {} + + +def test_delete_preset_callback_clears_form_state(monkeypatch, mocker): + dummy = DummySt() + dummy.session_state.update( + { + "api_presets": pd.DataFrame([{"id": "2"}]), + "editing_preset": True, + "current_preset_edit_id": "2", + "preset_form_data": {"name": "Old"}, + } + ) + monkeypatch.setattr(view, "st", dummy) + + updated_df = pd.DataFrame([]) + mocker.patch( + "views.api_configurazione.delete_preset", + return_value=(True, "deleted", updated_df), + ) + view.delete_preset_callback("2") + + assert dummy.session_state.api_presets is updated_df + assert dummy.successes == ["deleted"] + assert dummy.session_state.editing_preset is False + assert dummy.session_state.current_preset_edit_id is None + assert dummy.session_state.preset_form_data == {} diff --git a/tests/test_api_preset_controller.py b/tests/test_api_preset_controller.py index aa6557e..d2b9a24 100644 --- a/tests/test_api_preset_controller.py +++ b/tests/test_api_preset_controller.py @@ -1,7 +1,5 @@ import os import sys -from unittest.mock import Mock, patch - import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -9,46 +7,53 @@ from controllers import api_preset_controller as controller # noqa: E402 -@patch("controllers.api_preset_controller.load_presets") -def test_validate_preset_empty_name(mock_load): +def test_validate_preset_empty_name(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") ok, msg = controller.validate_preset({"name": ""}) assert ok is False assert "non può essere vuoto" in msg mock_load.assert_not_called() -@patch("controllers.api_preset_controller.load_presets") -def test_validate_preset_duplicate(mock_load): +def test_validate_preset_duplicate(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) ok, msg = controller.validate_preset({"name": "A"}) assert ok is False assert "esiste già" in msg -@patch("controllers.api_preset_controller.load_presets") -def test_validate_preset_ok(mock_load): +def test_validate_preset_ok(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") mock_load.return_value = pd.DataFrame({"id": ["1"], "name": ["A"]}) ok, msg = controller.validate_preset({"name": "B"}) assert ok is True assert msg == "" -@patch("controllers.api_preset_controller.refresh_api_presets") -@patch("controllers.api_preset_controller.APIPreset.save") -@patch("controllers.api_preset_controller.load_presets") -@patch("controllers.api_preset_controller.uuid.uuid4", return_value="new-id") -def test_save_preset_new(mock_uuid, mock_load, mock_save, mock_refresh): - df = pd.DataFrame([ - { - "id": "1", - "name": "Old", - "endpoint": "e", - "api_key": "k", - "model": "m", - "temperature": 0.0, - "max_tokens": 100, - } - ]) +def test_save_preset_new(mocker): + mock_uuid = mocker.patch( + "controllers.api_preset_controller.uuid.uuid4", return_value="new-id" + ) + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_save = mocker.patch("controllers.api_preset_controller.APIPreset.save") + mock_refresh = mocker.patch( + "controllers.api_preset_controller.refresh_api_presets" + ) + + df = pd.DataFrame( + [ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ] + ) mock_load.return_value = df updated_df = pd.DataFrame([]) mock_refresh.return_value = updated_df @@ -73,21 +78,26 @@ def test_save_preset_new(mock_uuid, mock_load, mock_save, mock_refresh): assert any(p.name == "New" for p in saved_presets) -@patch("controllers.api_preset_controller.refresh_api_presets") -@patch("controllers.api_preset_controller.APIPreset.delete") -@patch("controllers.api_preset_controller.load_presets") -def test_delete_preset(mock_load, mock_delete, mock_refresh): - df = pd.DataFrame([ - { - "id": "1", - "name": "Old", - "endpoint": "e", - "api_key": "k", - "model": "m", - "temperature": 0.0, - "max_tokens": 100, - } - ]) +def test_delete_preset(mocker): + mock_load = mocker.patch("controllers.api_preset_controller.load_presets") + mock_delete = mocker.patch("controllers.api_preset_controller.APIPreset.delete") + mock_refresh = mocker.patch( + "controllers.api_preset_controller.refresh_api_presets" + ) + + df = pd.DataFrame( + [ + { + "id": "1", + "name": "Old", + "endpoint": "e", + "api_key": "k", + "model": "m", + "temperature": 0.0, + "max_tokens": 100, + } + ] + ) mock_load.return_value = df updated_df = pd.DataFrame([]) mock_refresh.return_value = updated_df @@ -99,14 +109,14 @@ def test_delete_preset(mock_load, mock_delete, mock_refresh): mock_delete.assert_called_once_with("1") -@patch("utils.openai_client.get_openai_client") -def test_test_api_connection_delegates(mock_get_client): - mock_client = Mock() +def test_test_api_connection_delegates(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client - mock_choice = Mock() - mock_choice.message = Mock() + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() mock_choice.message.content = "Connessione riuscita." - mock_resp = Mock() + mock_resp = mocker.Mock() mock_resp.choices = [mock_choice] mock_client.chat.completions.create.return_value = mock_resp @@ -115,3 +125,4 @@ def test_test_api_connection_delegates(mock_get_client): assert ok is True assert "riuscita" in msg.lower() mock_get_client.assert_called_once_with(api_key="k", base_url="e") + diff --git a/tests/test_api_preset_model.py b/tests/test_api_preset_model.py new file mode 100644 index 0000000..8874497 --- /dev/null +++ b/tests/test_api_preset_model.py @@ -0,0 +1,133 @@ +import os +import sys +from types import SimpleNamespace + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import models.api_preset as api_preset_module +from models.api_preset import APIPreset +from models.orm_models import APIPresetORM +from models.database import Base + + +@pytest.fixture +def session_factory(monkeypatch): + engine = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(engine) + SessionLocal = sessionmaker(bind=engine) + + dummy_engine = SimpleNamespace(get_session=lambda: SessionLocal()) + monkeypatch.setattr(api_preset_module.DatabaseEngine, "instance", lambda: dummy_engine) + return SessionLocal + + +def test_load_all_returns_correct_attributes(session_factory): + session = session_factory() + session.add( + APIPresetORM( + id="1", + name="Preset", + provider_name="OpenAI", + endpoint="http://api", + api_key="key", + model="gpt", + temperature=0.7, + max_tokens=1000, + ) + ) + session.commit() + session.close() + + presets = APIPreset.load_all() + assert len(presets) == 1 + preset = presets[0] + assert preset.id == "1" + assert preset.name == "Preset" + assert preset.provider_name == "OpenAI" + assert preset.endpoint == "http://api" + assert preset.api_key == "key" + assert preset.model == "gpt" + assert preset.temperature == 0.7 + assert preset.max_tokens == 1000 + + +def test_save_inserts_and_updates(session_factory): + APIPreset.save([ + APIPreset( + id="1", + name="Initial", + provider_name="P", + endpoint="E", + api_key="K", + model="M", + temperature=0.1, + max_tokens=50, + ) + ]) + + session = session_factory() + row = session.get(APIPresetORM, "1") + assert row.name == "Initial" + assert row.max_tokens == 50 + session.close() + + APIPreset.save([ + APIPreset( + id="1", + name="Updated", + provider_name="P2", + endpoint="E2", + api_key="K2", + model="M2", + temperature=0.2, + max_tokens=150, + ) + ]) + + session = session_factory() + row = session.get(APIPresetORM, "1") + assert row.name == "Updated" + assert row.provider_name == "P2" + assert row.endpoint == "E2" + assert row.api_key == "K2" + assert row.model == "M2" + assert row.temperature == 0.2 + assert row.max_tokens == 150 + session.close() + + +def test_delete_existing_and_non_existing(session_factory): + session = session_factory() + session.add( + APIPresetORM( + id="1", + name="Preset", + provider_name="P", + endpoint="E", + api_key="K", + model="M", + temperature=0.1, + max_tokens=10, + ) + ) + session.commit() + session.close() + + APIPreset.delete("1") + session = session_factory() + assert session.get(APIPresetORM, "1") is None + session.close() + + APIPreset.delete("nonexistent") + session = session_factory() + assert session.query(APIPresetORM).count() == 0 + session.close() diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..bc3da92 --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,68 @@ +import importlib +import sys +import types +from pathlib import Path + + +def test_app_page_config_and_navigation(monkeypatch): + """Smoke test for Streamlit app configuration and navigation setup.""" + # Record calls to Streamlit API + page_config = {} + radio_call = {} + + def fake_set_page_config(**kwargs): + page_config.update(kwargs) + + def fake_radio(label, options): + radio_call["label"] = label + radio_call["options"] = options + return options[0] + + fake_sidebar = types.SimpleNamespace(radio=fake_radio) + fake_st = types.SimpleNamespace( + set_page_config=fake_set_page_config, + sidebar=fake_sidebar, + title=lambda *a, **k: None, + ) + + monkeypatch.setitem(sys.modules, "streamlit", fake_st) + + # Ensure repository root is importable + project_root = Path(__file__).resolve().parent.parent + sys.path.insert(0, str(project_root)) + + # Stub view modules required by app.py + views_pkg = types.ModuleType("views") + views_pkg.__path__ = [] # mark as package + view_names = [ + "api_configurazione", + "esecuzione_test", + "gestione_domande", + "gestione_set", + "home", + "visualizza_risultati", + ] + for name in view_names: + mod = types.ModuleType(f"views.{name}") + mod.render = lambda: None + sys.modules[f"views.{name}"] = mod + setattr(views_pkg, name, mod) + + session_state_mod = types.ModuleType("views.session_state") + session_state_mod.initialize_session_state = lambda: None + sys.modules["views.session_state"] = session_state_mod + + style_utils_mod = types.ModuleType("views.style_utils") + style_utils_mod.add_global_styles = lambda: None + sys.modules["views.style_utils"] = style_utils_mod + + sys.modules["views"] = views_pkg + + # Ensure a fresh import of app + monkeypatch.delitem(sys.modules, "app", raising=False) + app = importlib.import_module("app") + + assert page_config["page_title"] == "LLM Test Evaluation Platform" + assert radio_call["label"] == "Navigazione" + assert radio_call["options"] == list(app.PAGES.keys()) + diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py new file mode 100644 index 0000000..fc6758d --- /dev/null +++ b/tests/test_cache_utils.py @@ -0,0 +1,170 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.cache import ( # noqa: E402 + get_questions, + refresh_questions, + get_question_sets, + refresh_question_sets, + get_api_presets, + refresh_api_presets, + get_results, + refresh_results, +) +from models.question import Question # noqa: E402 +from models.question_set import QuestionSet # noqa: E402 +from models.api_preset import APIPreset # noqa: E402 +from models.test_result import TestResult # noqa: E402 + + +def test_get_questions_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + Question(id="1", domanda="Q1", risposta_attesa="A1", categoria="C1") + ] + + monkeypatch.setattr(Question, "load_all", staticmethod(fake_load_all)) + get_questions.cache_clear() + + df1 = get_questions() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_questions() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + Question(id="2", domanda="Q2", risposta_attesa="A2", categoria="C2") + ] + + monkeypatch.setattr(Question, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_questions() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_questions() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_question_sets_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + QuestionSet(id="1", name="S1", questions=["q1"]) + ] + + monkeypatch.setattr(QuestionSet, "load_all", staticmethod(fake_load_all)) + get_question_sets.cache_clear() + + df1 = get_question_sets() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_question_sets() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + QuestionSet(id="2", name="S2", questions=["q2"]) + ] + + monkeypatch.setattr(QuestionSet, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_question_sets() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_question_sets() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_api_presets_cache(monkeypatch): + call_count = {"count": 0} + + def fake_load_all(): + call_count["count"] += 1 + return [ + APIPreset( + id="1", + name="P1", + provider_name="prov", + endpoint="e1", + api_key="k1", + model="m1", + temperature=0.5, + max_tokens=10, + ) + ] + + monkeypatch.setattr(APIPreset, "load_all", staticmethod(fake_load_all)) + get_api_presets.cache_clear() + + df1 = get_api_presets() + assert call_count["count"] == 1 + assert list(df1["id"]) == ["1"] + + df2 = get_api_presets() + assert call_count["count"] == 1 + assert df2.equals(df1) + + def fake_load_all_new(): + call_count["count"] += 1 + return [ + APIPreset( + id="2", + name="P2", + provider_name="prov2", + endpoint="e2", + api_key="k2", + model="m2", + temperature=0.7, + max_tokens=20, + ) + ] + + monkeypatch.setattr(APIPreset, "load_all", staticmethod(fake_load_all_new)) + df3 = refresh_api_presets() + assert call_count["count"] == 2 + assert list(df3["id"]) == ["2"] + + df4 = get_api_presets() + assert call_count["count"] == 2 + assert df4.equals(df3) + + +def test_get_and_refresh_results(monkeypatch): + load_called = {"count": 0} + refresh_called = {"count": 0} + df1 = pd.DataFrame([{"id": "1", "set_id": "s1", "timestamp": "t1", "results": {}}]) + df2 = pd.DataFrame([{"id": "2", "set_id": "s2", "timestamp": "t2", "results": {}}]) + + def fake_load_all_df(): + load_called["count"] += 1 + return df1 + + def fake_refresh_cache(): + refresh_called["count"] += 1 + return df2 + + monkeypatch.setattr(TestResult, "load_all_df", staticmethod(fake_load_all_df)) + monkeypatch.setattr(TestResult, "refresh_cache", staticmethod(fake_refresh_cache)) + + assert get_results().equals(df1) + assert load_called["count"] == 1 + + assert refresh_results().equals(df2) + assert refresh_called["count"] == 1 diff --git a/tests/test_component_utils_view.py b/tests/test_component_utils_view.py new file mode 100644 index 0000000..394da4b --- /dev/null +++ b/tests/test_component_utils_view.py @@ -0,0 +1,39 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import component_utils + + +class DummySt: + def __init__(self): + self.calls = [] + + def markdown(self, text, **kwargs): + self.calls.append(text) + + +def test_create_card_renders_expected_html(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(component_utils, 'st', dummy_st) + + component_utils.create_card('Titolo', 'Contenuto', icon='⭐', is_success=True) + + assert any('Titolo' in c and 'Contenuto' in c and '⭐' in c for c in dummy_st.calls) + # success card should have specific background color + assert any('#f8fff9' in c for c in dummy_st.calls) + + +def test_create_metrics_container_renders_metrics(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(component_utils, 'st', dummy_st) + + metrics = [{'label': 'Accuracy', 'value': 95, 'unit': '%', 'icon': '📈'}] + component_utils.create_metrics_container(metrics) + + # first call is CSS, second call is metrics HTML + assert len(dummy_st.calls) >= 2 + metrics_html = dummy_st.calls[-1] + assert 'Accuracy' in metrics_html + assert '95' in metrics_html + assert '📈' in metrics_html diff --git a/tests/test_esecuzione_test_view.py b/tests/test_esecuzione_test_view.py new file mode 100644 index 0000000..f79d428 --- /dev/null +++ b/tests/test_esecuzione_test_view.py @@ -0,0 +1,38 @@ +import pytest + +from views import esecuzione_test + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + + +def test_set_llm_mode_callback(monkeypatch): + dummy_st = DummySt() + dummy_st.session_state.test_mode = "Manual" + dummy_st.session_state.mode_changed = False + monkeypatch.setattr(esecuzione_test, "st", dummy_st) + + esecuzione_test.set_llm_mode_callback() + + assert dummy_st.session_state.test_mode == "Valutazione Automatica con LLM" + assert dummy_st.session_state.mode_changed is True + + +def test_run_llm_test_callback(monkeypatch): + dummy_st = DummySt() + dummy_st.session_state.run_llm_test = False + monkeypatch.setattr(esecuzione_test, "st", dummy_st) + + esecuzione_test.run_llm_test_callback() + + assert dummy_st.session_state.run_llm_test is True diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py index 35481f6..782745a 100644 --- a/tests/test_evaluate_answer.py +++ b/tests/test_evaluate_answer.py @@ -2,7 +2,6 @@ import logging import os import sys -from unittest.mock import Mock, patch import pytest @@ -11,24 +10,24 @@ from controllers.test_controller import evaluate_answer # noqa: E402 -def _mock_response(content: str): - mock_resp = Mock() - mock_choice = Mock() - mock_choice.message = Mock() +def _mock_response(mocker, content: str): + mock_resp = mocker.Mock() + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() mock_choice.message.content = content mock_resp.choices = [mock_choice] return mock_resp -def _mock_response_no_choices(): - mock_resp = Mock() +def _mock_response_no_choices(mocker): + mock_resp = mocker.Mock() mock_resp.choices = [] return mock_resp -@patch("utils.openai_client.get_openai_client") -def test_evaluate_answer_success(mock_get_client): - mock_client = Mock() +def test_evaluate_answer_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client evaluation = { @@ -39,7 +38,7 @@ def test_evaluate_answer_success(mock_get_client): "completeness": 90, } mock_client.chat.completions.create.return_value = _mock_response( - json.dumps(evaluation) + mocker, json.dumps(evaluation) ) result = evaluate_answer( @@ -50,19 +49,21 @@ def test_evaluate_answer_success(mock_get_client): assert result["similarity"] == 90 -@patch("utils.openai_client.get_openai_client", return_value=None) -def test_evaluate_answer_no_client(mock_get_client): +def test_evaluate_answer_no_client(mocker): + mocker.patch("utils.openai_client.get_openai_client", return_value=None) with pytest.raises(ValueError): evaluate_answer( "q", "expected", "actual", {"api_key": None} ) -@patch("utils.openai_client.get_openai_client") -def test_evaluate_answer_json_decode_error(mock_get_client): - mock_client = Mock() +def test_evaluate_answer_json_decode_error(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response("not json") + mock_client.chat.completions.create.return_value = _mock_response( + mocker, "not json" + ) with pytest.raises(ValueError): evaluate_answer( @@ -70,11 +71,11 @@ def test_evaluate_answer_json_decode_error(mock_get_client): ) -@patch("utils.openai_client.get_openai_client") -def test_evaluate_answer_no_choices(mock_get_client, caplog): - mock_client = Mock() +def test_evaluate_answer_no_choices(mocker, caplog): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response_no_choices() + mock_client.chat.completions.create.return_value = _mock_response_no_choices(mocker) with caplog.at_level(logging.ERROR): with pytest.raises(RuntimeError): diff --git a/tests/test_file_reader_utils.py b/tests/test_file_reader_utils.py new file mode 100644 index 0000000..c4784a8 --- /dev/null +++ b/tests/test_file_reader_utils.py @@ -0,0 +1,90 @@ +import json +import pandas as pd +import pytest + +from utils.file_reader_utils import ( + read_questions, + read_question_sets, + read_test_results, + filter_new_rows, +) + + +def test_read_questions_csv(tmp_path): + file = tmp_path / "questions.csv" + file.write_text("domanda,risposta_attesa\nq1,a1\n") + with file.open("r") as f: + df = read_questions(f) + assert list(df.columns) == ["id", "domanda", "risposta_attesa", "categoria"] + assert df.iloc[0]["domanda"] == "q1" + + +def test_read_questions_json(tmp_path): + content = [{"domanda": "q1", "risposta_attesa": "a1"}] + file = tmp_path / "questions.json" + file.write_text(json.dumps(content)) + with file.open("r") as f: + df = read_questions(f) + assert df.iloc[0]["risposta_attesa"] == "a1" + + +def test_read_questions_missing_column(tmp_path): + file = tmp_path / "bad_questions.csv" + file.write_text("domanda\nq1\n") + with file.open("r") as f: + with pytest.raises(ValueError): + read_questions(f) + + +def test_read_question_sets_csv(tmp_path): + file = tmp_path / "sets.csv" + file.write_text( + "name,id,domanda,risposta_attesa,categoria\ns1,1,q1,a1,c1\n" + ) + with file.open("r") as f: + sets = read_question_sets(f) + assert sets == [ + { + "name": "s1", + "questions": [ + { + "id": "1", + "domanda": "q1", + "risposta_attesa": "a1", + "categoria": "c1", + } + ], + } + ] + + +def test_read_question_sets_missing_columns(tmp_path): + file = tmp_path / "bad_sets.csv" + file.write_text("name,id,domanda\ns1,1,q1\n") + with file.open("r") as f: + with pytest.raises(ValueError): + read_question_sets(f) + + +def test_read_test_results_csv(tmp_path): + file = tmp_path / "results.csv" + file.write_text("id,set_id,timestamp,results\n1,s1,2024-01-01,{}\n") + with file.open("r") as f: + df = read_test_results(f) + assert df.iloc[0]["set_id"] == "s1" + assert df.iloc[0]["results"] == {} + + +def test_read_test_results_invalid_json(tmp_path): + file = tmp_path / "bad_results.json" + file.write_text("{invalid json") + with file.open("r") as f: + with pytest.raises(ValueError): + read_test_results(f) + + +def test_filter_new_rows_duplicates(): + df = pd.DataFrame({"id": ["a", "b", "b", "c"]}) + filtered, count = filter_new_rows(df, ["b", "d"]) + assert list(filtered["id"]) == ["a", "c"] + assert count == 2 diff --git a/tests/test_gestione_domande_view.py b/tests/test_gestione_domande_view.py new file mode 100644 index 0000000..72c7a4e --- /dev/null +++ b/tests/test_gestione_domande_view.py @@ -0,0 +1,183 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import gestione_domande +from views.state_models import QuestionPageState + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.button_returns: list[bool] = [] + self.rerun_called = False + + def write(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def success(self, *args, **kwargs): + pass + + def button(self, *args, **kwargs): + if self.button_returns: + return self.button_returns.pop(0) + return False + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def rerun(self): + self.rerun_called = True + + +def _setup(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(gestione_domande, "st", dummy_st) + return dummy_st + + +def test_create_save_question_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + questions_df = pd.DataFrame({"id": [1]}) + + def fake_save_question_action(*_args): + return {"success": True, "questions_df": questions_df} + + monkeypatch.setattr( + gestione_domande, "save_question_action", fake_save_question_action + ) + + cb = gestione_domande.create_save_question_callback("1", "q", "a", "cat") + cb() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_success is True + assert state.save_success_message == "Domanda salvata." + assert state.trigger_rerun is True + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_create_save_question_callback_failure(monkeypatch): + dummy_st = _setup(monkeypatch) + + def fake_save_question_action(*_args): + return {"success": False} + + monkeypatch.setattr( + gestione_domande, "save_question_action", fake_save_question_action + ) + + cb = gestione_domande.create_save_question_callback("1", "q", "a", "cat") + cb() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_error is True + assert state.save_error_message == "Domanda non salvata." + assert state.trigger_rerun is False + + +def test_import_questions_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content = object() + questions_df = pd.DataFrame({"id": [1]}) + + def fake_import_questions_action(_file): + return {"questions_df": questions_df, "imported_count": 2, "warnings": []} + + monkeypatch.setattr( + gestione_domande, "import_questions_action", fake_import_questions_action + ) + + gestione_domande.import_questions_callback() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.import_success is True + assert "2" in state.import_success_message + assert dummy_st.session_state.upload_questions_file is None + assert dummy_st.session_state.uploaded_file_content is None + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_import_questions_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content = object() + + def fake_import_questions_action(_file): + raise Exception("bad") + + monkeypatch.setattr( + gestione_domande, "import_questions_action", fake_import_questions_action + ) + + gestione_domande.import_questions_callback() + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.import_error is True + assert state.import_error_message == "bad" + assert dummy_st.session_state.upload_questions_file is None + assert dummy_st.session_state.uploaded_file_content is None + + +def test_confirm_delete_question_dialog_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + questions_df = pd.DataFrame({"id": [1]}) + + def fake_delete_question_action(_id): + return questions_df + + monkeypatch.setattr( + gestione_domande, "delete_question_action", fake_delete_question_action + ) + + gestione_domande.confirm_delete_question_dialog.__wrapped__(1, "q1") + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.delete_success is True + assert state.trigger_rerun is True + assert dummy_st.rerun_called is True + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + + +def test_confirm_delete_question_dialog_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + + def fake_delete_question_action(_id): + raise Exception("fail") + + monkeypatch.setattr( + gestione_domande, "delete_question_action", fake_delete_question_action + ) + + gestione_domande.confirm_delete_question_dialog.__wrapped__(1, "q1") + + state: QuestionPageState = dummy_st.session_state.question_page_state + assert state.save_error is True + assert state.save_error_message == "fail" + assert dummy_st.rerun_called is True + diff --git a/tests/test_gestione_set_view.py b/tests/test_gestione_set_view.py new file mode 100644 index 0000000..e37f63b --- /dev/null +++ b/tests/test_gestione_set_view.py @@ -0,0 +1,187 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import set_helpers +from views.state_models import SetPageState +from models.question_set import PersistSetsResult + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.button_returns: list[bool] = [] + self.rerun_called = False + + def write(self, *args, **kwargs): + pass + + def button(self, *args, **kwargs): + if self.button_returns: + return self.button_returns.pop(0) + return False + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def warning(self, *args, **kwargs): + pass + + def rerun(self): + self.rerun_called = True + + +def _setup(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(set_helpers, "st", dummy_st) + return dummy_st + + +def test_create_save_set_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.set_expanders = {} + dummy_st.session_state.question_checkboxes = {"1": {"2": True}} + dummy_st.session_state.newly_selected_questions = {"1": ["3"]} + dummy_st.session_state.set_name_1 = "Name" + state = SetPageState() + + captured = {} + + def fake_save_set_callback(set_id, name, options, new_ids, st_state): + captured["args"] = (set_id, name, options, new_ids) + st_state.save_set_success = True + + monkeypatch.setattr(set_helpers, "save_set_callback", fake_save_set_callback) + + cb = set_helpers.create_save_set_callback("1", "exp1", state) + cb() + + assert dummy_st.session_state.set_expanders["exp1"] is True + assert captured["args"] == ( + "1", + "Name", + {"2": True}, + ["3"], + ) + assert state.save_set_success is True + + +def test_create_save_set_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.set_expanders = {} + dummy_st.session_state.question_checkboxes = {"1": {}} + dummy_st.session_state.newly_selected_questions = {"1": []} + dummy_st.session_state.set_name_1 = "Name" + state = SetPageState() + + def fake_save_set_callback(set_id, name, options, new_ids, st_state): + st_state.save_set_error = True + st_state.save_set_error_message = "boom" + + monkeypatch.setattr(set_helpers, "save_set_callback", fake_save_set_callback) + + cb = set_helpers.create_save_set_callback("1", "exp1", state) + cb() + + assert dummy_st.session_state.set_expanders["exp1"] is True + assert state.save_set_error is True + assert state.save_set_error_message == "boom" + + +def test_import_set_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + + result = PersistSetsResult( + sets_df=pd.DataFrame({"id": [1]}), + questions_df=pd.DataFrame({"id": [2]}), + sets_imported_count=1, + new_questions_added_count=0, + existing_questions_found_count=0, + warnings=["warn"], + ) + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _f: result) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_success is True + assert state.import_set_success_message == "1 set importati." + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + assert dummy_st.session_state.uploaded_file_content_set is None + + +def test_import_set_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + dummy_st.session_state.upload_set_file = object() + + def fake_import_from_file(_f): + raise Exception("fail") + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", fake_import_from_file) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_error is True + assert state.import_set_error_message == "fail" + assert dummy_st.session_state.uploaded_file_content_set is None + assert "upload_set_file" not in dummy_st.session_state + + +def test_confirm_delete_set_dialog_success(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + state = SetPageState() + + def fake_delete_set_callback(set_id, st_state): + st_state.delete_set_success = True + dummy_st.session_state.question_sets = pd.DataFrame({"id": [1]}) + + monkeypatch.setattr(set_helpers, "delete_set_callback", fake_delete_set_callback) + + set_helpers.confirm_delete_set_dialog.__wrapped__("1", "name", state) + + assert state.delete_set_success is True + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert dummy_st.rerun_called is True + + +def test_confirm_delete_set_dialog_error(monkeypatch): + dummy_st = _setup(monkeypatch) + dummy_st.button_returns = [True, False] + state = SetPageState() + + def fake_delete_set_callback(set_id, st_state): + st_state.save_set_error = True + st_state.save_set_error_message = "bad" + + monkeypatch.setattr(set_helpers, "delete_set_callback", fake_delete_set_callback) + + set_helpers.confirm_delete_set_dialog.__wrapped__("1", "name", state) + + assert state.save_set_error is True + assert state.save_set_error_message == "bad" + assert dummy_st.rerun_called is True + diff --git a/tests/test_home_view.py b/tests/test_home_view.py new file mode 100644 index 0000000..80a8d6a --- /dev/null +++ b/tests/test_home_view.py @@ -0,0 +1,47 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import home + + +class DummyColumn: + def __init__(self, parent): + self.parent = parent + + def markdown(self, text, **kwargs): + self.parent.markdown(text, **kwargs) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.markdown_calls = [] + + def markdown(self, text, **kwargs): + self.markdown_calls.append(text) + + def columns(self, n): + return (DummyColumn(self), DummyColumn(self)) + + +def test_home_render_injects_styles_and_content(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(home, 'st', dummy_st) + + called = {'home_styles': False} + + def fake_add_home_styles(): + called['home_styles'] = True + + monkeypatch.setattr(home, 'add_home_styles', fake_add_home_styles) + + home.render() + + assert called['home_styles'] is True + assert any('Piattaforma di Valutazione LLM' in m for m in dummy_st.markdown_calls) diff --git a/tests/test_import_results.py b/tests/test_import_results.py index be58f93..19a26cd 100644 --- a/tests/test_import_results.py +++ b/tests/test_import_results.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import patch import pandas as pd import pytest @@ -14,15 +13,10 @@ @pytest.mark.parametrize("filename", ["test_results.csv", "test_results.json"]) -@patch("models.test_result.TestResult.refresh_cache") -@patch("models.test_result.TestResult.save") -@patch("models.test_result.TestResult.load_all_df") -def test_import_from_file_skips_duplicates_and_saves( - mock_load, - mock_save, - mock_refresh, - filename, -): +def test_import_from_file_skips_duplicates_and_saves(mocker, filename): + mock_load = mocker.patch("models.test_result.TestResult.load_all_df") + mock_save = mocker.patch("models.test_result.TestResult.save") + mock_refresh = mocker.patch("models.test_result.TestResult.refresh_cache") mock_load.return_value = pd.DataFrame( [{"id": "1", "set_id": "s1", "timestamp": "t0", "results": {}}] ) diff --git a/tests/test_initialize_db.py b/tests/test_initialize_db.py new file mode 100644 index 0000000..cd26608 --- /dev/null +++ b/tests/test_initialize_db.py @@ -0,0 +1,32 @@ +import logging +import runpy + + +def test_initialize_db_logs_success(mocker, caplog): + mock_engine = mocker.MagicMock() + mocker.patch("models.database.DatabaseEngine.instance", return_value=mock_engine) + mocker.patch("utils.startup_utils.setup_logging") + caplog.set_level(logging.INFO) + + runpy.run_module("initialize_db", run_name="__main__") + + mock_engine.init_db.assert_called_once_with() + assert any( + "Database inizializzato con successo" in record.message for record in caplog.records + ) + + +def test_initialize_db_logs_error(mocker, caplog): + mock_engine = mocker.MagicMock() + mock_engine.init_db.side_effect = Exception("boom") + mocker.patch("models.database.DatabaseEngine.instance", return_value=mock_engine) + mocker.patch("utils.startup_utils.setup_logging") + caplog.set_level(logging.ERROR) + + runpy.run_module("initialize_db", run_name="__main__") + + mock_engine.init_db.assert_called_once_with() + assert any( + "Errore durante l'inizializzazione del database" in record.message + for record in caplog.records + ) diff --git a/tests/test_models_cached_data.py b/tests/test_models_cached_data.py new file mode 100644 index 0000000..ebe304d --- /dev/null +++ b/tests/test_models_cached_data.py @@ -0,0 +1,49 @@ +import models.cached_data as cached_data + + +def test_get_questions(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return [1] + + monkeypatch.setattr(cached_data.Question, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_questions() == [1] + assert called + + +def test_get_question_sets(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['set'] + + monkeypatch.setattr(cached_data.QuestionSet, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_question_sets() == ['set'] + assert called + + +def test_get_api_presets(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['preset'] + + monkeypatch.setattr(cached_data.APIPreset, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_api_presets() == ['preset'] + assert called + + +def test_get_results(monkeypatch): + called = {} + + def fake_load_all(): + called['done'] = True + return ['result'] + + monkeypatch.setattr(cached_data.TestResult, 'load_all', staticmethod(fake_load_all)) + assert cached_data.get_results() == ['result'] + assert called diff --git a/tests/test_models_database.py b/tests/test_models_database.py new file mode 100644 index 0000000..f80606d --- /dev/null +++ b/tests/test_models_database.py @@ -0,0 +1,49 @@ +import pytest +from types import SimpleNamespace + +from models import database +from models.database import DatabaseEngine + + +def test_get_engine_uses_config_and_create_engine(monkeypatch): + DatabaseEngine._instance = None # ensure fresh singleton + db = DatabaseEngine.instance() + db._engine = None # type: ignore[attr-defined] + fake_cfg = {'user': 'u', 'password': 'p', 'host': 'h', 'database': 'db'} + monkeypatch.setattr(DatabaseEngine, '_load_config', lambda self: fake_cfg) + called = {} + + def fake_ensure(self, cfg): + called['ensure'] = cfg + monkeypatch.setattr(DatabaseEngine, '_ensure_database', fake_ensure) + fake_engine = SimpleNamespace() + + def fake_create_engine(url, pool_pre_ping=True, pool_recycle=3600): + called['url'] = url + return fake_engine + monkeypatch.setattr(database, 'create_engine', fake_create_engine) + + engine = db.get_engine() + assert engine is fake_engine + assert called['ensure'] == fake_cfg + assert 'mysql+pymysql://u:p@h:3306/db' in called['url'] + # second call should reuse same engine + assert db.get_engine() is fake_engine + + +def test_ensure_database_error(monkeypatch): + DatabaseEngine._instance = None + db = DatabaseEngine.instance() + + class DummyEngine: + def begin(self): + raise Exception('boom') + + monkeypatch.setattr(database, 'create_engine', lambda *a, **k: DummyEngine()) + with pytest.raises(RuntimeError): + db._ensure_database({ + 'user': 'u', + 'password': 'p', + 'host': 'h', + 'database': 'd', + }) diff --git a/tests/test_models_orm.py b/tests/test_models_orm.py new file mode 100644 index 0000000..3aa12d1 --- /dev/null +++ b/tests/test_models_orm.py @@ -0,0 +1,92 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +from models.orm_models import ( + Base, + QuestionORM, + QuestionSetORM, + TestResultORM as ResultORM, + APIPresetORM, + question_set_questions, +) + + +def test_orm_tables_and_relationships(): + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + + with Session(engine) as session: + question = QuestionORM( + id="q1", + domanda="2+2?", + risposta_attesa="4", + categoria="math", + ) + qset = QuestionSetORM(id="s1", name="Sample Set", questions=[question]) + result = ResultORM( + id="r1", + set_id="s1", + timestamp="2024-01-01T00:00:00", + results={"q1": "4"}, + ) + preset = APIPresetORM( + id="a1", + name="default", + provider_name="openai", + endpoint="http://api.example", + api_key="secret", + model="gpt", + temperature=0.1, + max_tokens=10, + ) + session.add_all([qset, result, preset]) + session.commit() + + assert session.get(QuestionORM, "q1").domanda == "2+2?" + assert session.get(QuestionSetORM, "s1").questions[0].id == "q1" + assert session.get(ResultORM, "r1").results == {"q1": "4"} + assert session.get(APIPresetORM, "a1").model == "gpt" + + # Column names + assert set(QuestionORM.__table__.columns.keys()) == { + "id", + "domanda", + "risposta_attesa", + "categoria", + } + assert set(QuestionSetORM.__table__.columns.keys()) == {"id", "name"} + assert set(ResultORM.__table__.columns.keys()) == { + "id", + "set_id", + "timestamp", + "results", + } + assert set(APIPresetORM.__table__.columns.keys()) == { + "id", + "name", + "provider_name", + "endpoint", + "api_key", + "model", + "temperature", + "max_tokens", + } + assert set(question_set_questions.c.keys()) == {"set_id", "question_id"} + + # Foreign keys + fk_set = list(question_set_questions.c.set_id.foreign_keys)[0] + fk_question = list(question_set_questions.c.question_id.foreign_keys)[0] + assert fk_set.column.table.name == "question_sets" + assert fk_question.column.table.name == "questions" + + # Metadata consistency + for name, table in [ + ("questions", QuestionORM.__table__), + ("question_sets", QuestionSetORM.__table__), + ("test_results", ResultORM.__table__), + ("api_presets", APIPresetORM.__table__), + ("question_set_questions", question_set_questions), + ]: + assert name in Base.metadata.tables + assert Base.metadata.tables[name] is table + diff --git a/tests/test_models_question.py b/tests/test_models_question.py new file mode 100644 index 0000000..19b4f1d --- /dev/null +++ b/tests/test_models_question.py @@ -0,0 +1,33 @@ +import pandas as pd + +from models.question import Question +from models.orm_models import QuestionORM +from models.database import DatabaseEngine + + +def test_add_and_update_question(in_memory_db): + qid = Question.add('d1', 'r1', 'c1') + with DatabaseEngine.instance().get_session() as session: + q = session.get(QuestionORM, qid) + assert q.domanda == 'd1' + assert Question.update(qid, domanda='d2', categoria='c2') is True + with DatabaseEngine.instance().get_session() as session: + q = session.get(QuestionORM, qid) + assert q.domanda == 'd2' + assert q.categoria == 'c2' + assert Question.update('missing', domanda='x') is False + + +def test_persist_entities_handles_duplicates(in_memory_db): + existing_id = Question.add('d', 'r', 'c') + df = pd.DataFrame([ + {'id': existing_id, 'domanda': 'd', 'risposta_attesa': 'r', 'categoria': 'c'}, + {'id': 'new1', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': 'c2'}, + {'id': 'new1', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': 'c2'}, + ]) + count, warnings = Question._persist_entities(df) + assert count == 1 + assert len(warnings) == 1 + assert 'già esistente' in warnings[0] + with DatabaseEngine.instance().get_session() as session: + assert session.get(QuestionORM, 'new1') is not None diff --git a/tests/test_models_question_set.py b/tests/test_models_question_set.py new file mode 100644 index 0000000..4b4cb78 --- /dev/null +++ b/tests/test_models_question_set.py @@ -0,0 +1,43 @@ +import pandas as pd + +from models.question import Question +from models.question_set import QuestionSet +from models.orm_models import QuestionSetORM +from models.database import DatabaseEngine + + +def test_create_and_update_question_set(in_memory_db): + qid1 = Question.add('d1', 'r1') + qid2 = Question.add('d2', 'r2') + set_id = QuestionSet.create('set1', [qid1]) + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + assert qset.name == 'set1' + assert [q.id for q in qset.questions] == [qid1] + QuestionSet.update(set_id, name='set2', question_ids=[qid2]) + with DatabaseEngine.instance().get_session() as session: + qset = session.get(QuestionSetORM, set_id) + assert qset.name == 'set2' + assert [q.id for q in qset.questions] == [qid2] + # update of missing set should not raise + QuestionSet.update('missing', name='x') + + +def test_resolve_question_ids(monkeypatch, in_memory_db): + current_questions = pd.DataFrame([ + {'id': '1', 'domanda': 'd1', 'risposta_attesa': 'r1', 'categoria': ''} + ]) + data = ['1', {'id': '2', 'domanda': 'd2', 'risposta_attesa': 'r2', 'categoria': ''}, {'id': '3'}] + monkeypatch.setattr( + 'controllers.question_controller.add_question_if_not_exists', + lambda **kwargs: True, + ) + ids, updated, new_added, existing_found, warnings = QuestionSet._resolve_question_ids( + data, current_questions + ) + assert ids == ['1', '2'] + assert new_added == 1 + assert existing_found == 1 + assert len(warnings) == 1 + assert 'saltata' in warnings[0] + assert '2' in updated['id'].values diff --git a/tests/test_models_test_result.py b/tests/test_models_test_result.py new file mode 100644 index 0000000..d28cae8 --- /dev/null +++ b/tests/test_models_test_result.py @@ -0,0 +1,35 @@ +import pandas as pd + +from models.test_result import TestResult +from models.orm_models import TestResultORM +from models.database import DatabaseEngine + + +def test_add_and_persist_entities(in_memory_db): + TestResult.load_all_df.cache_clear() + existing_id = TestResult.add('set1', {'timestamp': 't1'}) + TestResult.load_all_df.cache_clear() + df = pd.DataFrame([ + {'id': existing_id, 'set_id': 'set1', 'timestamp': 't1', 'results': {}}, + {'id': 'new', 'set_id': 'set2', 'timestamp': 't2', 'results': {}}, + ]) + added = TestResult._persist_entities(df) + assert added == 1 + with DatabaseEngine.instance().get_session() as session: + assert session.get(TestResultORM, 'new') is not None + + +def test_calculate_statistics(): + data = { + 'q1': {'question': 'Q1', 'evaluation': {'score': 1, 'similarity': 2, 'correctness': 3, 'completeness': 4}}, + 'q2': {'question': 'Q2', 'evaluation': {'score': 3, 'similarity': 6, 'correctness': 9, 'completeness': 12}}, + } + stats = TestResult.calculate_statistics(data) + assert stats['avg_score'] == 2 + assert stats['radar_metrics']['similarity'] == 4 + assert len(stats['per_question_scores']) == 2 + assert TestResult.calculate_statistics({}) == { + 'avg_score': 0, + 'per_question_scores': [], + 'radar_metrics': {'similarity': 0, 'correctness': 0, 'completeness': 0}, + } diff --git a/tests/test_openai_client.py b/tests/test_openai_client.py new file mode 100644 index 0000000..2d5675d --- /dev/null +++ b/tests/test_openai_client.py @@ -0,0 +1,60 @@ +import logging +import os +import sys +from types import SimpleNamespace + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from utils.openai_client import ( # noqa: E402 + DEFAULT_MODEL, + get_available_models_for_endpoint, + get_openai_client, +) + + +def test_get_openai_client_no_api_key(caplog): + caplog.set_level(logging.WARNING) + client = get_openai_client("") + assert client is None + assert "Tentativo di creare client OpenAI senza chiave API." in caplog.text + + +def test_get_openai_client_uses_custom_base_url(mocker): + mock_openai = mocker.patch("utils.openai_client.OpenAI") + mock_client = mocker.MagicMock() + mock_openai.return_value = mock_client + + result = get_openai_client("key", base_url="http://custom") + + mock_openai.assert_called_once_with(api_key="key", base_url="http://custom") + assert result is mock_client + + +def test_get_available_models_returns_error_when_no_client(mocker): + mocker.patch("utils.openai_client.get_openai_client", return_value=None) + models = get_available_models_for_endpoint( + "Personalizzato", endpoint_url="http://endpoint", api_key="key" + ) + assert models[0] == "(Errore creazione client API)" + assert DEFAULT_MODEL in models + + +def test_get_available_models_filters_embeddings(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + dummy_models = [ + SimpleNamespace(id="gpt-4o"), + SimpleNamespace(id="text-embedding-3-small"), + SimpleNamespace(id="chat-model"), + SimpleNamespace(id="my-embedding-model"), + ] + + dummy_client = SimpleNamespace(models=SimpleNamespace(list=lambda: dummy_models)) + mock_get_client.return_value = dummy_client + + models = get_available_models_for_endpoint( + "Personalizzato", endpoint_url="http://endpoint", api_key="key" + ) + + assert "text-embedding-3-small" not in models + assert "my-embedding-model" not in models + assert "gpt-4o" in models and "chat-model" in models diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py index 896a64d..e31d187 100644 --- a/tests/test_openai_controllers.py +++ b/tests/test_openai_controllers.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import Mock, patch import pytest @@ -11,47 +10,49 @@ from controllers.test_controller import generate_answer # noqa: E402 -def _mock_response(content: str): +def _mock_response(mocker, content: str): """Crea una risposta simulata con il contenuto fornito.""" - mock_resp = Mock() - mock_choice = Mock() - mock_choice.message = Mock() + mock_resp = mocker.Mock() + mock_choice = mocker.Mock() + mock_choice.message = mocker.Mock() mock_choice.message.content = content mock_resp.choices = [mock_choice] return mock_resp -@patch("utils.openai_client.get_openai_client") -def test_generate_answer_success(mock_get_client): - mock_client = Mock() +def test_generate_answer_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response(" answer ") + mock_client.chat.completions.create.return_value = _mock_response( + mocker, " answer " + ) result = generate_answer("question", {"api_key": "key"}) assert result == "answer" -@patch("utils.openai_client.get_openai_client", return_value=None) -def test_generate_answer_no_client(mock_get_client): +def test_generate_answer_no_client(mocker): + mocker.patch("utils.openai_client.get_openai_client", return_value=None) with pytest.raises(ValueError): generate_answer("question", {"api_key": None}) -@patch("utils.openai_client.get_openai_client") -def test_generate_answer_empty_question(mock_get_client): - mock_get_client.return_value = Mock() +def test_generate_answer_empty_question(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_get_client.return_value = mocker.Mock() with pytest.raises(ValueError): generate_answer("", {"api_key": "key"}) -@patch("utils.openai_client.get_openai_client") -def test_test_api_connection_success(mock_get_client): - mock_client = Mock() +def test_test_api_connection_success(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client mock_client.chat.completions.create.return_value = _mock_response( - "Connessione riuscita." + mocker, "Connessione riuscita." ) ok, msg = api_preset_controller.test_api_connection( @@ -62,11 +63,13 @@ def test_test_api_connection_success(mock_get_client): assert msg == "Connessione API riuscita!" -@patch("utils.openai_client.get_openai_client") -def test_test_api_connection_unexpected_response(mock_get_client): - mock_client = Mock() +def test_test_api_connection_unexpected_response(mocker): + mock_get_client = mocker.patch("utils.openai_client.get_openai_client") + mock_client = mocker.Mock() mock_get_client.return_value = mock_client - mock_client.chat.completions.create.return_value = _mock_response("failure") + mock_client.chat.completions.create.return_value = _mock_response( + mocker, "failure" + ) ok, msg = api_preset_controller.test_api_connection( "key", "endpoint", "model", 0.1, 10 @@ -76,8 +79,8 @@ def test_test_api_connection_unexpected_response(mock_get_client): assert "Risposta inattesa" in msg -@patch("utils.openai_client.get_openai_client", return_value=None) -def test_test_api_connection_no_client(mock_get_client): +def test_test_api_connection_no_client(mocker): + mocker.patch("utils.openai_client.get_openai_client", return_value=None) ok, msg = api_preset_controller.test_api_connection( "key", "endpoint", "model", 0.1, 10 ) diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index 287ac39..f318dad 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import patch import pandas as pd import pytest @@ -10,12 +9,12 @@ from controllers import question_controller # noqa: E402 -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.add") -@patch("controllers.question_controller.load_questions") -def test_add_question_if_not_exists_existing( - mock_load_questions, mock_add, mock_refresh -): +def test_add_question_if_not_exists_existing(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) mock_load_questions.return_value = pd.DataFrame({"id": ["123"]}) result = question_controller.add_question_if_not_exists( @@ -30,10 +29,12 @@ def test_add_question_if_not_exists_existing( mock_refresh.assert_not_called() -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.add") -@patch("controllers.question_controller.load_questions") -def test_add_question_if_not_exists_new(mock_load_questions, mock_add, mock_refresh): +def test_add_question_if_not_exists_new(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) mock_load_questions.return_value = pd.DataFrame({"id": ["456"]}) result = question_controller.add_question_if_not_exists( @@ -48,9 +49,9 @@ def test_add_question_if_not_exists_new(mock_load_questions, mock_add, mock_refr mock_refresh.assert_called_once() -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.add") -def test_add_question(mock_add, mock_refresh): +def test_add_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_add = mocker.patch("controllers.question_controller.Question.add") mock_add.return_value = "qid" result = question_controller.add_question("dom", "ans", "cat", "qid") @@ -60,9 +61,9 @@ def test_add_question(mock_add, mock_refresh): mock_refresh.assert_called_once() -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.update") -def test_update_question(mock_update, mock_refresh): +def test_update_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.Question.update") mock_update.return_value = True result = question_controller.update_question("qid", "dom", "ans", "cat") @@ -72,15 +73,17 @@ def test_update_question(mock_update, mock_refresh): mock_refresh.assert_called_once() -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.delete") -def test_delete_question(mock_delete, mock_refresh): +def test_delete_question(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_delete = mocker.patch("controllers.question_controller.Question.delete") question_controller.delete_question("qid") mock_delete.assert_called_once_with("qid") mock_refresh.assert_called_once() -@patch("controllers.question_controller.Question.filter_by_category") -def test_get_filtered_questions(mock_filter): +def test_get_filtered_questions(mocker): + mock_filter = mocker.patch( + "controllers.question_controller.Question.filter_by_category" + ) df = pd.DataFrame( { "id": ["1"], @@ -97,8 +100,8 @@ def test_get_filtered_questions(mock_filter): assert questions["id"].tolist() == ["1"] -@patch("utils.cache.get_questions") -def test_filter_by_category(mock_get_questions): +def test_filter_by_category(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") mock_get_questions.return_value = pd.DataFrame( { "id": ["1", "2"], @@ -113,8 +116,8 @@ def test_filter_by_category(mock_get_questions): assert filtered_df["id"].tolist() == ["1"] -@patch("utils.cache.get_questions") -def test_filter_by_category_no_category_column(mock_get_questions): +def test_filter_by_category_no_category_column(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") mock_get_questions.return_value = pd.DataFrame( { "id": ["1"], @@ -129,8 +132,8 @@ def test_filter_by_category_no_category_column(mock_get_questions): assert categories == ["N/A"] -@patch("utils.cache.get_questions") -def test_filter_by_category_empty_df(mock_get_questions): +def test_filter_by_category_empty_df(mocker): + mock_get_questions = mocker.patch("utils.cache.get_questions") mock_get_questions.return_value = pd.DataFrame() filtered_df, categories = question_controller.Question.filter_by_category() @@ -138,18 +141,18 @@ def test_filter_by_category_empty_df(mock_get_questions): assert categories == [] -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.load_questions") -def test_get_question_text_found(mock_load, mock_refresh): +def test_get_question_text_found(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") mock_load.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) text = question_controller.get_question_text("1") mock_refresh.assert_not_called() assert text == "Q1" -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.load_questions") -def test_get_question_text_refresh(mock_load, mock_refresh): +def test_get_question_text_refresh(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") mock_load.return_value = pd.DataFrame({"id": ["1"]}) mock_refresh.return_value = pd.DataFrame({"id": ["1"], "domanda": ["Q1"]}) text = question_controller.get_question_text("1") @@ -157,18 +160,18 @@ def test_get_question_text_refresh(mock_load, mock_refresh): assert text == "Q1" -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.load_questions") -def test_get_question_category_found(mock_load, mock_refresh): +def test_get_question_category_found(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") mock_load.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) cat = question_controller.get_question_category("1") mock_refresh.assert_not_called() assert cat == "C1" -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.load_questions") -def test_get_question_category_refresh(mock_load, mock_refresh): +def test_get_question_category_refresh(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_load = mocker.patch("controllers.question_controller.load_questions") mock_load.return_value = pd.DataFrame({"id": ["1"]}) mock_refresh.return_value = pd.DataFrame({"id": ["1"], "categoria": ["C1"]}) cat = question_controller.get_question_category("1") @@ -176,9 +179,9 @@ def test_get_question_category_refresh(mock_load, mock_refresh): assert cat == "C1" -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.update_question") -def test_save_question_action_success(mock_update, mock_refresh): +def test_save_question_action_success(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.update_question") mock_update.return_value = True df = pd.DataFrame({"id": ["1"]}) mock_refresh.return_value = df @@ -193,9 +196,9 @@ def test_save_question_action_success(mock_update, mock_refresh): assert result["questions_df"].equals(df) -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.update_question") -def test_save_question_action_failure(mock_update, mock_refresh): +def test_save_question_action_failure(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_update = mocker.patch("controllers.question_controller.update_question") mock_update.return_value = False result = question_controller.save_question_action("1", "q", "a", "c") @@ -204,9 +207,9 @@ def test_save_question_action_failure(mock_update, mock_refresh): assert result["questions_df"] is None -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.delete_question") -def test_delete_question_action(mock_delete, mock_refresh): +def test_delete_question_action(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_delete = mocker.patch("controllers.question_controller.delete_question") df = pd.DataFrame() mock_refresh.return_value = df @@ -217,9 +220,11 @@ def test_delete_question_action(mock_delete, mock_refresh): assert result.equals(df) -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.import_from_file") -def test_import_questions_action_success(mock_import, mock_refresh): +def test_import_questions_action_success(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_import = mocker.patch( + "controllers.question_controller.Question.import_from_file" + ) mock_import.return_value = { "success": True, "imported_count": 1, @@ -243,9 +248,11 @@ def test_import_questions_action_no_file(): question_controller.import_questions_action(None) -@patch("controllers.question_controller.refresh_questions") -@patch("controllers.question_controller.Question.import_from_file") -def test_import_questions_action_failure(mock_import, mock_refresh): +def test_import_questions_action_failure(mocker): + mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") + mock_import = mocker.patch( + "controllers.question_controller.Question.import_from_file" + ) mock_import.return_value = { "success": False, "imported_count": 0, diff --git a/tests/test_question_import.py b/tests/test_question_import.py index 2378698..9b90df9 100644 --- a/tests/test_question_import.py +++ b/tests/test_question_import.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import patch sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -41,9 +40,9 @@ def get_session(self): return self.session -@patch("models.question.DatabaseEngine.instance") -def test_import_from_file_skips_duplicates_and_adds_new(mock_engine): +def test_import_from_file_skips_duplicates_and_adds_new(mocker): engine = DummyEngine() + mock_engine = mocker.patch("models.question.DatabaseEngine.instance") mock_engine.return_value = engine data_dir = os.path.join(os.path.dirname(__file__), "sample_data") diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py index 60fdf78..e38a9ba 100644 --- a/tests/test_question_set_controller.py +++ b/tests/test_question_set_controller.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import patch import pandas as pd sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -8,9 +7,13 @@ from controllers import question_set_controller # noqa: E402 -@patch("controllers.question_set_controller.refresh_question_sets") -@patch("controllers.question_set_controller.QuestionSet.create") -def test_create_set_controller(mock_create, mock_refresh): +def test_create_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_create = mocker.patch( + "controllers.question_set_controller.QuestionSet.create" + ) mock_create.return_value = "sid" result = question_set_controller.create_set("name", ["q1"]) @@ -20,26 +23,39 @@ def test_create_set_controller(mock_create, mock_refresh): mock_refresh.assert_called_once() -@patch("controllers.question_set_controller.refresh_question_sets") -@patch("controllers.question_set_controller.QuestionSet.update") -def test_update_set_controller(mock_update, mock_refresh): +def test_update_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_update = mocker.patch( + "controllers.question_set_controller.QuestionSet.update" + ) question_set_controller.update_set("sid", name="name", question_ids=["q1"]) mock_update.assert_called_once_with("sid", "name", ["q1"]) mock_refresh.assert_called_once() -@patch("controllers.question_set_controller.refresh_question_sets") -@patch("controllers.question_set_controller.QuestionSet.delete") -def test_delete_set_controller(mock_delete, mock_refresh): +def test_delete_set_controller(mocker): + mock_refresh = mocker.patch( + "controllers.question_set_controller.refresh_question_sets" + ) + mock_delete = mocker.patch( + "controllers.question_set_controller.QuestionSet.delete" + ) question_set_controller.delete_set("sid") mock_delete.assert_called_once_with("sid") mock_refresh.assert_called_once() -@patch("controllers.question_set_controller._get_question_sets") -@patch("controllers.question_set_controller._get_questions") -def test_prepare_sets_for_view(mock_get_questions, mock_get_sets): + +def test_prepare_sets_for_view(mocker): + mock_get_sets = mocker.patch( + "controllers.question_set_controller._get_question_sets" + ) + mock_get_questions = mocker.patch( + "controllers.question_set_controller._get_questions" + ) questions_df = pd.DataFrame( { "id": ["1", "2"], diff --git a/tests/test_question_set_import.py b/tests/test_question_set_import.py index eec3e91..8be591f 100644 --- a/tests/test_question_set_import.py +++ b/tests/test_question_set_import.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import patch import pandas as pd import pytest @@ -14,19 +13,20 @@ @pytest.mark.parametrize("filename", ["question_sets.json", "question_sets.csv"]) -@patch("controllers.question_controller.add_question_if_not_exists") -@patch("models.question_set.QuestionSet.create") -@patch("controllers.question_set_controller.load_sets") -@patch("controllers.question_controller.load_questions") -@patch("utils.cache.refresh_question_sets", return_value=pd.DataFrame()) -def test_import_from_file_handles_duplicates( - mock_refresh, - mock_load_questions, - mock_load_sets, - mock_create, - mock_add_question, - filename, -): +def test_import_from_file_handles_duplicates(mocker, filename): + mock_refresh = mocker.patch( + "utils.cache.refresh_question_sets", return_value=pd.DataFrame() + ) + mock_load_questions = mocker.patch( + "controllers.question_controller.load_questions" + ) + mock_load_sets = mocker.patch( + "controllers.question_set_controller.load_sets" + ) + mock_create = mocker.patch("models.question_set.QuestionSet.create") + mock_add_question = mocker.patch( + "controllers.question_controller.add_question_if_not_exists" + ) mock_load_questions.return_value = pd.DataFrame( {"id": ["q1"], "domanda": ["Existing"], "risposta_attesa": ["A1"], "categoria": ["cat1"]} ) diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py index dad79bd..3eefb94 100644 --- a/tests/test_question_set_importer.py +++ b/tests/test_question_set_importer.py @@ -1,6 +1,5 @@ import io import json -from unittest.mock import patch import os import sys @@ -32,8 +31,10 @@ def test_read_question_sets_json_not_list(): read_question_sets(file) -@patch("controllers.question_controller.add_question_if_not_exists") -def test_resolve_question_ids_adds_and_existing(mock_add): +def test_resolve_question_ids_adds_and_existing(mocker): + mock_add = mocker.patch( + "controllers.question_controller.add_question_if_not_exists" + ) mock_add.return_value = True current_questions = pd.DataFrame( [{"id": "2", "domanda": "", "risposta_attesa": "", "categoria": ""}] @@ -76,9 +77,9 @@ def test_resolve_question_ids_missing_id(): assert updated_df.empty -@patch("utils.cache.refresh_question_sets") -@patch("models.question_set.QuestionSet.create") -def test_persist_sets_skips_duplicates(mock_create, mock_refresh): +def test_persist_sets_skips_duplicates(mocker): + mock_refresh = mocker.patch("utils.cache.refresh_question_sets") + mock_create = mocker.patch("models.question_set.QuestionSet.create") mock_refresh.return_value = pd.DataFrame( [{"id": "s1", "name": "Existing", "questions": []}] ) diff --git a/tests/test_result_controller.py b/tests/test_result_controller.py new file mode 100644 index 0000000..0ae0a2e --- /dev/null +++ b/tests/test_result_controller.py @@ -0,0 +1,92 @@ +import os +import sys + +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import result_controller as controller # noqa: E402 + + +def sample_data(): + results_df = pd.DataFrame( + [ + { + "id": "1", + "set_id": "10", + "timestamp": "2024-01-01", + "results": { + "generation_llm": "gpt-3.5", + "avg_score": 80, + "method": "LLM", + }, + }, + { + "id": "2", + "set_id": "20", + "timestamp": "2024-01-02", + "results": { + "generation_preset": "presetA", + "avg_score": 70, + "method": "LLM", + }, + }, + { + "id": "3", + "set_id": "10", + "timestamp": "2024-01-03", + "results": { + "generation_llm": "gpt-4", + "avg_score": 90, + "method": "LLM", + }, + }, + ] + ) + sets_df = pd.DataFrame( + [ + {"id": "10", "name": "Set1"}, + {"id": "20", "name": "Set2"}, + ] + ) + presets_df = pd.DataFrame( + [ + {"name": "presetA", "model": "gpt-3.5"}, + ] + ) + return results_df, sets_df, presets_df + + +def test_get_results_filters(mocker): + results_df, sets_df, presets_df = sample_data() + mocker.patch("controllers.result_controller.load_results", return_value=results_df) + mocker.patch("controllers.result_controller.load_sets", return_value=sets_df) + mocker.patch("controllers.result_controller.load_presets", return_value=presets_df) + + df_set = controller.get_results("Set1", None) + assert set(df_set["id"]) == {"1", "3"} + + df_model = controller.get_results(None, "gpt-3.5") + assert set(df_model["id"]) == {"1", "2"} + + +def test_list_names(mocker): + results_df, sets_df, presets_df = sample_data() + mocker.patch("controllers.result_controller.load_presets", return_value=presets_df) + + set_names = controller.list_set_names(results_df, sets_df) + assert set_names == ["Set1", "Set2"] + + model_names = controller.list_model_names(results_df) + assert model_names == ["gpt-3.5", "gpt-4"] + + +def test_prepare_select_options(): + results_df, sets_df, _ = sample_data() + options = controller.prepare_select_options(results_df, sets_df) + expected = { + "3": "2024-01-03 - 🤖 Set1 (Avg: 90.00%) - LLM", + "2": "2024-01-02 - 🤖 Set2 (Avg: 70.00%) - LLM", + "1": "2024-01-01 - 🤖 Set1 (Avg: 80.00%) - LLM", + } + assert options == expected diff --git a/tests/test_session_state.py b/tests/test_session_state.py new file mode 100644 index 0000000..7bbcd49 --- /dev/null +++ b/tests/test_session_state.py @@ -0,0 +1,28 @@ +import pytest + +from views import session_state as ss + + +def test_initialize_session_state_writes_required_keys(monkeypatch): + fake_defaults = { + "questions": [], + "question_sets": [], + "results": [], + "api_key": "key", + "endpoint": "https://example.com", + "model": "gpt-4", + "temperature": 0.5, + "max_tokens": 1000, + } + monkeypatch.setattr(ss, "get_initial_state", lambda: fake_defaults) + monkeypatch.setattr(ss.st, "session_state", {}) + ss.initialize_session_state() + for key, value in fake_defaults.items(): + assert ss.st.session_state[key] == value + + +def test_ensure_keys_respects_existing(monkeypatch): + monkeypatch.setattr(ss.st, "session_state", {"existing": 1}) + ss.ensure_keys({"existing": 2, "missing": 3}) + assert ss.st.session_state["existing"] == 1 + assert ss.st.session_state["missing"] == 3 diff --git a/tests/test_set_helpers.py b/tests/test_set_helpers.py index b476ecf..2463f12 100644 --- a/tests/test_set_helpers.py +++ b/tests/test_set_helpers.py @@ -1,76 +1,148 @@ +import os +import sys + import pandas as pd +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + from views.state_models import SetPageState from models.question_set import PersistSetsResult -# create dummy st object class DummySessionState(dict): def __getattr__(self, name): return self.get(name) + def __setattr__(self, name, value): self[name] = value + class DummySt: def __init__(self): self.session_state = DummySessionState() - self.captured_warnings = [] + self.captured_warnings: list[str] = [] + def warning(self, msg): self.captured_warnings.append(msg) -def test_import_set_callback_message_and_warnings(monkeypatch): +def _setup(monkeypatch): from views import set_helpers dummy_st = DummySt() - dummy_st.session_state.uploaded_file_content_set = object() monkeypatch.setattr(set_helpers, "st", dummy_st) + return set_helpers, dummy_st - result = PersistSetsResult( - sets_df=pd.DataFrame(), - questions_df=pd.DataFrame(), - sets_imported_count=2, - new_questions_added_count=1, - existing_questions_found_count=0, - warnings=["w1", "w2"], - ) - monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _: result) + +def test_save_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + sets_df = pd.DataFrame({"id": [1]}) + + def fake_update_set(_set_id, _name, _ids): + return sets_df, "ok" + + monkeypatch.setattr(set_helpers, "update_set", fake_update_set) state = SetPageState() - set_helpers.import_set_callback(state) + set_helpers.save_set_callback("1", "name", {}, [], state) - assert state.import_set_success is True - assert state.import_set_success_message == "2 set importati. 1 nuove domande aggiunte." - assert dummy_st.captured_warnings == ["w1", "w2"] - assert isinstance(dummy_st.session_state.questions, pd.DataFrame) + assert state.save_set_success is True + assert state.save_set_success_message == "ok" assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) - assert dummy_st.session_state.uploaded_file_content_set is None -def test_import_set_callback_no_imports_with_warnings(monkeypatch): - from views import set_helpers +def test_save_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) - dummy_st = DummySt() + def fake_update_set(*_args, **_kwargs): + raise Exception("boom") + + monkeypatch.setattr(set_helpers, "update_set", fake_update_set) + + state = SetPageState() + set_helpers.save_set_callback("1", "name", {}, [], state) + + assert state.save_set_error is True + assert state.save_set_error_message == "boom" + assert "question_sets" not in dummy_st.session_state + + +def test_delete_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + sets_df = pd.DataFrame({"id": [1]}) + + def fake_delete_set(_set_id): + return sets_df, "deleted" + + monkeypatch.setattr(set_helpers, "delete_set", fake_delete_set) + + state = SetPageState() + set_helpers.delete_set_callback("1", state) + + assert state.delete_set_success is True + assert state.delete_set_success_message == "deleted" + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + + +def test_delete_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + + def fake_delete_set(_set_id): + raise Exception("fail") + + monkeypatch.setattr(set_helpers, "delete_set", fake_delete_set) + + state = SetPageState() + set_helpers.delete_set_callback("1", state) + + assert state.save_set_error is True + assert state.save_set_error_message == "fail" + assert "question_sets" not in dummy_st.session_state + + +def test_import_set_callback_success(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) dummy_st.session_state.uploaded_file_content_set = object() - monkeypatch.setattr(set_helpers, "st", dummy_st) result = PersistSetsResult( - sets_df=pd.DataFrame(), - questions_df=pd.DataFrame(), - sets_imported_count=0, + sets_df=pd.DataFrame({"id": [1]}), + questions_df=pd.DataFrame({"id": [2]}), + sets_imported_count=1, new_questions_added_count=0, existing_questions_found_count=0, warnings=["warn"], ) - monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _: result) + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", lambda _f: result) state = SetPageState() set_helpers.import_set_callback(state) assert state.import_set_success is True - assert ( - state.import_set_success_message - == "Nessun set importato. Controlla gli avvisi." - ) + assert state.import_set_success_message == "1 set importati." + assert isinstance(dummy_st.session_state.question_sets, pd.DataFrame) + assert isinstance(dummy_st.session_state.questions, pd.DataFrame) assert dummy_st.captured_warnings == ["warn"] assert dummy_st.session_state.uploaded_file_content_set is None + + +def test_import_set_callback_error(monkeypatch): + set_helpers, dummy_st = _setup(monkeypatch) + dummy_st.session_state.uploaded_file_content_set = object() + dummy_st.session_state.upload_set_file = object() + + def fake_import_from_file(_f): + raise Exception("bad") + + monkeypatch.setattr(set_helpers.QuestionSet, "import_from_file", fake_import_from_file) + + state = SetPageState() + set_helpers.import_set_callback(state) + + assert state.import_set_error is True + assert state.import_set_error_message == "bad" + assert dummy_st.session_state.uploaded_file_content_set is None + assert "upload_set_file" not in dummy_st.session_state + diff --git a/tests/test_startup_controller.py b/tests/test_startup_controller.py new file mode 100644 index 0000000..e18cc3b --- /dev/null +++ b/tests/test_startup_controller.py @@ -0,0 +1,54 @@ +import os +import sys + +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers import startup_controller as controller # noqa: E402 + + +def test_get_initial_state(monkeypatch): + calls = [] + + def mock_initialize_database(): + calls.append("init") + + def mock_load_default_config(): + calls.append("defaults") + return {"conf": "value"} + + def mock_get_questions(): + calls.append("questions") + return ["q1"] + + def mock_get_question_sets(): + calls.append("question_sets") + return ["qs1"] + + def mock_get_results(): + calls.append("results") + return ["r1"] + + monkeypatch.setattr(controller, "initialize_database", mock_initialize_database) + monkeypatch.setattr(controller, "load_default_config", mock_load_default_config) + monkeypatch.setattr(controller, "get_questions", mock_get_questions) + monkeypatch.setattr(controller, "get_question_sets", mock_get_question_sets) + monkeypatch.setattr(controller, "get_results", mock_get_results) + + state = controller.get_initial_state() + + assert state == { + "questions": ["q1"], + "question_sets": ["qs1"], + "results": ["r1"], + "conf": "value", + } + + assert calls == [ + "init", + "defaults", + "questions", + "question_sets", + "results", + ] diff --git a/tests/test_startup_utils.py b/tests/test_startup_utils.py new file mode 100644 index 0000000..993460b --- /dev/null +++ b/tests/test_startup_utils.py @@ -0,0 +1,39 @@ +import logging + +from utils.startup_utils import setup_logging, initialize_database, load_default_config +from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT + + +def test_setup_logging_creates_file(tmp_path): + log_file = tmp_path / "app.log" + root_logger = logging.getLogger() + root_logger.handlers.clear() + setup_logging(log_file=log_file) + logging.getLogger().info("hello") + assert log_file.exists() + assert "hello" in log_file.read_text() + + +def test_initialize_database_calls_init_db(monkeypatch, mocker): + dummy_engine = mocker.MagicMock() + monkeypatch.setattr( + initialize_database.__globals__["DatabaseEngine"], + "instance", + classmethod(lambda cls: dummy_engine), + ) + initialize_database() + dummy_engine.init_db.assert_called_once() + + +def test_load_default_config_returns_expected(monkeypatch, tmp_path): + db_cfg = tmp_path / "db.config" + db_cfg.write_text("[mysql]\nuser=u\npassword=p\nhost=h\ndatabase=d\n") + monkeypatch.setenv("OPENAI_API_KEY", "key") + config = load_default_config() + assert config == { + "api_key": "key", + "endpoint": DEFAULT_ENDPOINT, + "model": DEFAULT_MODEL, + "temperature": 0.0, + "max_tokens": 1000, + } diff --git a/tests/test_state_models.py b/tests/test_state_models.py new file mode 100644 index 0000000..72b9580 --- /dev/null +++ b/tests/test_state_models.py @@ -0,0 +1,21 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views.state_models import SetPageState, QuestionPageState + + +def test_set_page_state_defaults_and_mutability(): + state = SetPageState() + assert state.save_set_success is False + assert state.save_set_success_message == 'Set aggiornato con successo!' + state.save_set_success = True + assert state.save_set_success is True + + +def test_question_page_state_defaults_and_mutability(): + state = QuestionPageState() + assert state.save_success is False + assert state.delete_success_message == 'Domanda eliminata con successo!' + state.save_success = True + assert state.save_success is True diff --git a/tests/test_style_utils.py b/tests/test_style_utils.py new file mode 100644 index 0000000..44b8329 --- /dev/null +++ b/tests/test_style_utils.py @@ -0,0 +1,56 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import style_utils + + +class DummySt: + def __init__(self): + self.calls = [] + + def markdown(self, text, **kwargs): + self.calls.append(text) + + +def test_add_global_styles_injects_css(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_global_styles() + + assert any('stTextInput' in c for c in dummy_st.calls) + + +def test_add_page_header_calls_global_styles_and_renders(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + called = {'global': False} + + def fake_add_global_styles(): + called['global'] = True + + monkeypatch.setattr(style_utils, 'add_global_styles', fake_add_global_styles) + + style_utils.add_page_header('Titolo', icon='✨', description='desc') + + assert called['global'] is True + assert any('✨ Titolo' in c and 'desc' in c for c in dummy_st.calls) + + +def test_add_section_title_renders_text(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_section_title('Section', icon='➡') + + assert any('➡ Section' in c for c in dummy_st.calls) + + +def test_add_home_styles_injects_css(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(style_utils, 'st', dummy_st) + + style_utils.add_home_styles() + + assert any('feature-box' in c for c in dummy_st.calls) diff --git a/tests/test_test_controller.py b/tests/test_test_controller.py new file mode 100644 index 0000000..16e4a2b --- /dev/null +++ b/tests/test_test_controller.py @@ -0,0 +1,93 @@ +import os +import sys +from types import SimpleNamespace + +import pandas as pd +import pytest + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from controllers.test_controller import import_results_action, run_test + + +def test_import_results_action_no_file(mocker): + mock_import = mocker.patch( + "controllers.test_controller.TestResult.import_from_file" + ) + mock_load_results = mocker.patch("controllers.test_controller.load_results") + with pytest.raises(ValueError, match="Nessun file caricato"): + import_results_action(None) + mock_import.assert_not_called() + mock_load_results.assert_not_called() + + +def test_import_results_action_failure(mocker): + mock_import = mocker.patch( + "controllers.test_controller.TestResult.import_from_file" + ) + mock_load_results = mocker.patch("controllers.test_controller.load_results") + mock_import.return_value = (False, "errore") + with pytest.raises(ValueError, match="errore"): + import_results_action("dummy") + mock_load_results.assert_not_called() + + +def test_run_test_success(mocker): + mock_load_all = mocker.patch("controllers.test_controller.Question.load_all") + mock_gen = mocker.patch("controllers.test_controller.generate_answer") + mock_eval = mocker.patch("controllers.test_controller.evaluate_answer") + mock_add_refresh = mocker.patch( + "controllers.test_controller.TestResult.add_and_refresh", return_value="rid" + ) + mock_load_df = mocker.patch( + "controllers.test_controller.TestResult.load_all_df", + return_value=pd.DataFrame(), + ) + mock_load_all.return_value = [SimpleNamespace(id="1", domanda="Q", risposta_attesa="A")] + mock_gen.return_value = "Ans" + mock_eval.return_value = { + "score": 50, + "explanation": "ok", + "similarity": 50, + "correctness": 50, + "completeness": 50, + } + + res = run_test("set1", "name", ["1"], {}, {}) + + assert res["result_id"] == "rid" + assert res["avg_score"] == 50 + assert isinstance(res["results_df"], pd.DataFrame) + assert res["results"]["1"]["actual_answer"] == "Ans" + + +def test_run_test_generation_and_evaluation_errors(mocker): + mock_load_all = mocker.patch("controllers.test_controller.Question.load_all") + mock_gen = mocker.patch("controllers.test_controller.generate_answer") + mock_eval = mocker.patch("controllers.test_controller.evaluate_answer") + mock_add_refresh = mocker.patch( + "controllers.test_controller.TestResult.add_and_refresh", return_value="rid" + ) + mock_load_df = mocker.patch( + "controllers.test_controller.TestResult.load_all_df", + return_value=pd.DataFrame(), + ) + questions = [ + SimpleNamespace(id="1", domanda="Q1", risposta_attesa="A1"), + SimpleNamespace(id="2", domanda="Q2", risposta_attesa="A2"), + ] + mock_load_all.return_value = questions + mock_gen.side_effect = [Exception("gen fail"), "ans2"] + mock_eval.side_effect = [Exception("eval fail")] + + res = run_test("set1", "name", ["1", "2"], {}, {}) + + assert res["result_id"] == "rid" + assert res["avg_score"] == 0 + q1 = res["results"]["1"] + q2 = res["results"]["2"] + assert q1["actual_answer"] == "gen fail" + assert q1["evaluation"]["score"] == 0 + assert q2["actual_answer"] == "ans2" + assert q2["evaluation"]["score"] == 0 + assert isinstance(res["results_df"], pd.DataFrame) diff --git a/tests/test_ui_utils.py b/tests/test_ui_utils.py new file mode 100644 index 0000000..6b3e045 --- /dev/null +++ b/tests/test_ui_utils.py @@ -0,0 +1,21 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import ui_utils, style_utils, component_utils + + +def test_ui_utils_re_exports_functions(): + assert ui_utils.add_global_styles is style_utils.add_global_styles + assert ui_utils.add_page_header is style_utils.add_page_header + assert ui_utils.add_section_title is style_utils.add_section_title + assert ui_utils.create_card is component_utils.create_card + assert ui_utils.create_metrics_container is component_utils.create_metrics_container + expected_all = { + 'add_global_styles', + 'add_page_header', + 'add_section_title', + 'create_card', + 'create_metrics_container', + } + assert set(ui_utils.__all__) == expected_all diff --git a/tests/test_visualizza_risultati_view.py b/tests/test_visualizza_risultati_view.py new file mode 100644 index 0000000..138daa1 --- /dev/null +++ b/tests/test_visualizza_risultati_view.py @@ -0,0 +1,150 @@ +import os +import sys +import pandas as pd + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from views import visualizza_risultati + + +class DummySessionState(dict): + def __getattr__(self, name): + return self.get(name) + + def __setattr__(self, name, value): + self[name] = value + + +class DummyContext: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +class DummySt: + def __init__(self): + self.session_state = DummySessionState() + self.captured_callbacks = {} + + def success(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def write(self, *args, **kwargs): + pass + + def info(self, *args, **kwargs): + pass + + def markdown(self, *args, **kwargs): + pass + + def selectbox(self, label, options, index=0, **kwargs): + return options[index] + + def button(self, label, on_click=None, **kwargs): + if on_click: + self.captured_callbacks[label] = on_click + return False + + def file_uploader(self, *args, **kwargs): + return None + + def download_button(self, *args, **kwargs): + pass + + def expander(self, *args, **kwargs): + return DummyContext() + + def columns(self, n): + return (DummyContext(), DummyContext()) + + def stop(self): + pass + + +class StopRender(Exception): + pass + + +def _setup(monkeypatch): + dummy_st = DummySt() + monkeypatch.setattr(visualizza_risultati, "st", dummy_st) + monkeypatch.setattr(visualizza_risultati, "add_page_header", lambda *a, **k: None) + monkeypatch.setattr(visualizza_risultati.json, "dumps", lambda *a, **k: "{}") + + res_df = pd.DataFrame([ + {"id": 1, "set_id": 1, "timestamp": "t", "results": {"method": "LLM"}} + ]) + sets_df = pd.DataFrame([{"id": 1, "name": "s"}]) + + monkeypatch.setattr(visualizza_risultati, "get_results", lambda *_a, **_k: res_df) + monkeypatch.setattr(visualizza_risultati, "load_sets", lambda: sets_df) + monkeypatch.setattr(visualizza_risultati, "list_set_names", lambda *_a: ["s"]) + monkeypatch.setattr(visualizza_risultati, "list_model_names", lambda *_a: ["m"]) + monkeypatch.setattr( + visualizza_risultati, "prepare_select_options", lambda df, sets: {1: "r"} + ) + + def fake_add_section_title(*args, **kwargs): + raise StopRender() + + monkeypatch.setattr(visualizza_risultati, "add_section_title", fake_add_section_title) + + try: + visualizza_risultati.render() + except StopRender: + pass + + return dummy_st + + +def test_import_results_callback_success(monkeypatch): + dummy_st = _setup(monkeypatch) + callback = dummy_st.captured_callbacks.get("Importa Risultati") + assert callback is not None + + res_df = pd.DataFrame([{"id": 2, "set_id": 1, "timestamp": "t2", "results": {}}]) + + def fake_import_results_action(_file): + return res_df, "ok" + + monkeypatch.setattr(visualizza_risultati, "import_results_action", fake_import_results_action) + + dummy_st.session_state.uploaded_results_file = object() + callback() + + assert dummy_st.session_state.import_results_success is True + assert dummy_st.session_state.import_results_message == "ok" + assert dummy_st.session_state.import_results_error is False + assert dummy_st.session_state.uploaded_results_file is None + assert dummy_st.session_state.upload_results is None + assert isinstance(dummy_st.session_state.results, pd.DataFrame) + + +def test_import_results_callback_error(monkeypatch): + dummy_st = _setup(monkeypatch) + callback = dummy_st.captured_callbacks.get("Importa Risultati") + assert callback is not None + + def fake_import_results_action(_file): + raise Exception("fail") + + monkeypatch.setattr(visualizza_risultati, "import_results_action", fake_import_results_action) + + dummy_st.session_state.uploaded_results_file = object() + callback() + + assert dummy_st.session_state.import_results_error is True + assert dummy_st.session_state.import_results_message == "fail" + assert dummy_st.session_state.import_results_success is False + assert dummy_st.session_state.uploaded_results_file is None + assert dummy_st.session_state.upload_results is None + diff --git a/utils/file_reader_utils.py b/utils/file_reader_utils.py index ed9df84..f3a405a 100644 --- a/utils/file_reader_utils.py +++ b/utils/file_reader_utils.py @@ -2,7 +2,7 @@ import json import uuid from datetime import datetime -from typing import List, Dict, Any, Iterable, Tuple +from typing import IO, Any, Dict, Iterable, List, Tuple import pandas as pd @@ -43,7 +43,7 @@ def filter_new_rows(df: pd.DataFrame, existing_ids: Iterable[str]) -> Tuple[pd.D return filtered, int(mask.sum()) -def read_questions(file) -> pd.DataFrame: +def read_questions(file: IO[str] | IO[bytes]) -> pd.DataFrame: """Legge un file di domande (CSV o JSON) e restituisce un DataFrame normalizzato.""" if hasattr(file, "seek"): file.seek(0) @@ -99,7 +99,7 @@ def read_questions(file) -> pd.DataFrame: return df[["id", "domanda", "risposta_attesa", "categoria"]] -def read_question_sets(file) -> List[Dict[str, Any]]: +def read_question_sets(file: IO[str] | IO[bytes]) -> List[Dict[str, Any]]: """Legge un file di set di domande (CSV o JSON) e restituisce una lista di dizionari.""" if hasattr(file, "seek"): file.seek(0) @@ -154,7 +154,7 @@ def read_question_sets(file) -> List[Dict[str, Any]]: raise ValueError("Formato file non supportato. Caricare un file CSV o JSON.") -def read_test_results(file) -> pd.DataFrame: +def read_test_results(file: IO[str] | IO[bytes]) -> pd.DataFrame: """Legge un file di risultati di test (CSV o JSON) e restituisce un DataFrame normalizzato.""" if hasattr(file, "seek"): file.seek(0) diff --git a/utils/openai_client.py b/utils/openai_client.py index b716648..7b7a369 100644 --- a/utils/openai_client.py +++ b/utils/openai_client.py @@ -1,18 +1,17 @@ """Utility per interagire con le API dei provider LLM.""" -# mypy: ignore-errors - import logging +from typing import Any, List from openai import OpenAI -logger = logging.getLogger(__name__) -DEFAULT_MODEL = "gpt-4o" -DEFAULT_ENDPOINT = "https://api.openai.com/v1" +logger = logging.getLogger(__name__) +DEFAULT_MODEL: str = "gpt-4o" +DEFAULT_ENDPOINT: str = "https://api.openai.com/v1" -def get_openai_client(api_key: str, base_url: str | None = None): +def get_openai_client(api_key: str, base_url: str | None = None) -> OpenAI | None: """Crea e restituisce un client OpenAI configurato.""" if not api_key: @@ -20,7 +19,9 @@ def get_openai_client(api_key: str, base_url: str | None = None): return None try: effective_base_url = ( - base_url if base_url and base_url.strip() and base_url != "custom" else DEFAULT_ENDPOINT + base_url + if base_url and base_url.strip() and base_url != "custom" + else DEFAULT_ENDPOINT ) return OpenAI(api_key=api_key, base_url=effective_base_url) except Exception as exc: @@ -29,27 +30,41 @@ def get_openai_client(api_key: str, base_url: str | None = None): def get_available_models_for_endpoint( - provider_name: str, endpoint_url: str | None = None, api_key: str | None = None -): + provider_name: str, + endpoint_url: str | None = None, + api_key: str | None = None, +) -> List[str]: """Restituisce una lista di modelli disponibili basata sul provider o sull'endpoint.""" # Aggiungi altri provider predefiniti qui # elif provider_name == "XAI": # return XAI_MODELS if provider_name == "Personalizzato": - if not api_key or not endpoint_url or endpoint_url == "custom" or not endpoint_url.strip(): - return ["(Endpoint personalizzato non specificato)", DEFAULT_MODEL, "gpt-4", "gpt-3.5-turbo"] + if ( + not api_key + or not endpoint_url + or endpoint_url == "custom" + or not endpoint_url.strip() + ): + return [ + "(Endpoint personalizzato non specificato)", + DEFAULT_MODEL, + "gpt-4", + "gpt-3.5-turbo", + ] client = get_openai_client(api_key=api_key, base_url=endpoint_url) if not client: return ["(Errore creazione client API)", DEFAULT_MODEL] try: - models_response = client.models.list() - models = getattr(models_response, "data", models_response) - filtered_models = sorted( + models_response: Any = client.models.list() + models: Any = getattr(models_response, "data", models_response) + filtered_models: List[str] = sorted( [ model.id for model in models - if not any(term in model.id.lower() for term in ["embed", "embedding"]) + if not any( + term in model.id.lower() for term in ["embed", "embedding"] + ) and ( any( term in model.id.lower() @@ -64,7 +79,9 @@ def get_available_models_for_endpoint( [ model.id for model in models - if not any(term in model.id.lower() for term in ["embed", "embedding"]) + if not any( + term in model.id.lower() for term in ["embed", "embedding"] + ) ] ) return filtered_models if filtered_models else [DEFAULT_MODEL] diff --git a/utils/startup_utils.py b/utils/startup_utils.py index 5d95d7d..bbf77e7 100644 --- a/utils/startup_utils.py +++ b/utils/startup_utils.py @@ -1,17 +1,28 @@ import logging import os +from pathlib import Path +from typing import TypedDict + from models.database import DatabaseEngine from utils.openai_client import DEFAULT_MODEL, DEFAULT_ENDPOINT logger = logging.getLogger(__name__) -def setup_logging(level: int = logging.INFO) -> None: - """Configura il logger radice con un formato di base.""" +def setup_logging(level: int = logging.INFO, log_file: str | Path | None = None) -> None: + """Configura il logger radice con un formato di base. + + Se viene fornito ``log_file`` i log vengono scritti anche su tale file. + """ + # ``logging.basicConfig`` non accetta un dizionario tipato con ``**`` in modo + # sicuro per mypy. Passiamo quindi gli argomenti esplicitamente in modo da + # evitare problemi di tipizzazione. + filename = str(log_file) if log_file is not None else None logging.basicConfig( level=level, format="%(asctime)s - %(levelname)s - %(message)s", + filename=filename, ) @@ -20,7 +31,17 @@ def initialize_database() -> None: DatabaseEngine.instance().init_db() -def load_default_config() -> dict: +class DefaultConfig(TypedDict): + """Configurazione di default per il client OpenAI.""" + + api_key: str + endpoint: str + model: str + temperature: float + max_tokens: int + + +def load_default_config() -> DefaultConfig: """Restituisce la configurazione API di default.""" return { "api_key": os.environ.get("OPENAI_API_KEY", ""), diff --git a/views/api_configurazione.py b/views/api_configurazione.py index 7dd5c24..f46b8ed 100644 --- a/views/api_configurazione.py +++ b/views/api_configurazione.py @@ -30,7 +30,7 @@ def start_new_preset_edit(): } -def start_existing_preset_edit(preset_id): +def start_existing_preset_edit(preset_id: str) -> None: preset_to_edit = get_preset_by_id(preset_id, st.session_state.api_presets) if not preset_to_edit: st.error("Preset non trovato.") diff --git a/views/component_utils.py b/views/component_utils.py index ba79784..aa942d3 100644 --- a/views/component_utils.py +++ b/views/component_utils.py @@ -1,4 +1,5 @@ import logging +from typing import Any import streamlit as st logger = logging.getLogger(__name__) @@ -70,7 +71,7 @@ def create_card(title: str, content: str, icon: str | None = None, ) -def create_metrics_container(metrics_data: list[dict]): +def create_metrics_container(metrics_data: list[dict[str, Any]]) -> None: """Crea un contenitore con metriche ben stilizzate.""" st.markdown( """ diff --git a/views/set_helpers.py b/views/set_helpers.py index 50d3333..d903236 100644 --- a/views/set_helpers.py +++ b/views/set_helpers.py @@ -1,4 +1,5 @@ import logging +from typing import IO, cast import streamlit as st @@ -11,7 +12,7 @@ def save_set_callback( set_id: str, edited_name: str, - question_options_checkboxes: dict, + question_options_checkboxes: dict[str, bool], newly_selected_questions_ids: list[str], state: SetPageState, ) -> None: @@ -94,8 +95,12 @@ def import_set_callback(state: SetPageState): state.import_set_error_message = "" uploaded_file = st.session_state.get("uploaded_file_content_set") + if uploaded_file is None: + raise ValueError("Nessun file caricato.") try: - result = QuestionSet.import_from_file(uploaded_file) + result = QuestionSet.import_from_file( + cast(IO[str] | IO[bytes], uploaded_file) + ) parts: list[str] = [] if result.sets_imported_count > 0: From a6ac57aa87c61db016dc3d9b27ce812d3eba20c6 Mon Sep 17 00:00:00 2001 From: oniichan Date: Sat, 16 Aug 2025 09:00:39 +0200 Subject: [PATCH 21/41] some changes --- controllers/__init__.py | 6 +++ controllers/api_preset_controller.py | 5 +- controllers/question_controller.py | 13 +++-- controllers/question_set_controller.py | 9 +++- controllers/test_controller.py | 48 ++++++++++------- models/database.py | 24 +++++++++ models/question.py | 53 ++++++++---------- models/question_set.py | 54 ++++++++++++++++--- models/test_result.py | 56 ++++++++++--------- tests/conftest.py | 8 +-- tests/test_app.py | 21 +++++--- tests/test_evaluate_answer.py | 7 ++- tests/test_file_writer_utils.py | 22 ++++++++ tests/test_import_results.py | 8 +-- tests/test_importer_export.py | 44 +++++++++++++++ tests/test_models_database.py | 9 ++-- tests/test_openai_client.py | 19 +++++-- tests/test_openai_controllers.py | 12 ++++- tests/test_question_controller.py | 13 ++++- tests/test_question_import.py | 4 +- tests/test_question_set_controller.py | 9 ++++ tests/test_question_set_importer.py | 32 +++++++++++ tests/test_register_page.py | 20 +++++++ tests/test_test_controller.py | 17 ++++-- tests/test_visualizza_risultati_view.py | 3 ++ utils/export_template.py | 34 ++++++++++++ utils/file_writer_utils.py | 51 ++++++++++++++++++ utils/import_template.py | 72 +++++++++++++++++++++++++ utils/openai_client.py | 23 +++++--- views/__init__.py | 19 +++++++ views/api_configurazione.py | 2 + views/esecuzione_test.py | 2 + views/gestione_domande.py | 2 + views/gestione_set.py | 2 + views/home.py | 5 +- views/visualizza_risultati.py | 20 ++++++- 36 files changed, 616 insertions(+), 132 deletions(-) create mode 100644 tests/test_file_writer_utils.py create mode 100644 tests/test_importer_export.py create mode 100644 tests/test_register_page.py create mode 100644 utils/export_template.py create mode 100644 utils/file_writer_utils.py create mode 100644 utils/import_template.py diff --git a/controllers/__init__.py b/controllers/__init__.py index 527141e..1bc3006 100644 --- a/controllers/__init__.py +++ b/controllers/__init__.py @@ -28,6 +28,7 @@ import_questions_action, get_question_text, get_question_category, + export_questions_action, ) # Gestione dei set di domande @@ -38,6 +39,7 @@ update_set, delete_set, prepare_sets_for_view, + export_sets_action, ) # Risultati e utilità di valutazione @@ -45,6 +47,7 @@ load_results, refresh_results, import_results_action, + export_results_action, generate_answer, evaluate_answer, run_test, @@ -89,6 +92,7 @@ "import_questions_action", "get_question_text", "get_question_category", + "export_questions_action", # Set di domande "load_sets", "refresh_question_sets", @@ -96,10 +100,12 @@ "update_set", "delete_set", "prepare_sets_for_view", + "export_sets_action", # Risultati dei test "load_results", "refresh_results", "import_results_action", + "export_results_action", "generate_answer", "evaluate_answer", "calculate_statistics", diff --git a/controllers/api_preset_controller.py b/controllers/api_preset_controller.py index 5aa2f64..9c2be2e 100644 --- a/controllers/api_preset_controller.py +++ b/controllers/api_preset_controller.py @@ -127,8 +127,9 @@ def test_api_connection( ) -> Tuple[bool, str]: """Testa la connessione all'API LLM con i parametri forniti.""" - client = openai_client.get_openai_client(api_key=api_key, base_url=endpoint) - if not client: + try: + client = openai_client.get_openai_client(api_key=api_key, base_url=endpoint) + except openai_client.ClientCreationError: return False, "Client API non inizializzato. Controlla chiave API e endpoint." try: diff --git a/controllers/question_controller.py b/controllers/question_controller.py index e83f471..761dfad 100644 --- a/controllers/question_controller.py +++ b/controllers/question_controller.py @@ -1,11 +1,11 @@ """Controller per la gestione delle domande senza layer di service.""" import logging -from typing import IO, Optional, Tuple, List, Dict, Any +from typing import IO, Optional, Tuple, List, Dict, Any, Union import pandas as pd -from models.question import Question +from models.question import Question, question_importer from utils.cache import ( get_questions as _get_questions, refresh_questions as _refresh_questions, @@ -106,6 +106,11 @@ def delete_question_action(question_id: str) -> pd.DataFrame: return questions +def export_questions_action(destination: Union[str, IO[str]]) -> None: + """Esporta tutte le domande nella destinazione fornita.""" + question_importer.export_to_file(destination) + + def import_questions_action(uploaded_file: IO[str] | IO[bytes]) -> Dict[str, Any]: """Importa domande da file e restituisce i risultati dell'operazione. @@ -123,8 +128,8 @@ def import_questions_action(uploaded_file: IO[str] | IO[bytes]) -> Dict[str, Any if uploaded_file is None: raise ValueError("Nessun file caricato.") - result = Question.import_from_file(uploaded_file) - if not result["success"]: + result = question_importer.import_from_file(uploaded_file) + if not result.get("success", True): message = "; ".join(result.get("warnings", [])) raise ValueError(message) diff --git a/controllers/question_set_controller.py b/controllers/question_set_controller.py index 01a2797..dd46d39 100644 --- a/controllers/question_set_controller.py +++ b/controllers/question_set_controller.py @@ -1,9 +1,9 @@ import logging -from typing import List, Optional, Any, Dict +from typing import List, Optional, Any, Dict, IO, Union import pandas as pd -from models.question_set import QuestionSet, PersistSetsResult +from models.question_set import QuestionSet, PersistSetsResult, question_set_importer from utils.cache import ( get_questions as _get_questions, get_question_sets as _get_question_sets, @@ -52,6 +52,11 @@ def delete_set(set_id: str) -> pd.DataFrame: return refresh_question_sets() +def export_sets_action(destination: Union[str, IO[str]]) -> None: + """Esporta tutti i set di domande nella destinazione indicata.""" + question_set_importer.export_to_file(destination) + + def prepare_sets_for_view( selected_categories: Optional[List[str]] = None, ) -> Dict[str, Any]: diff --git a/controllers/test_controller.py b/controllers/test_controller.py index 4a99bdf..1dfd3db 100644 --- a/controllers/test_controller.py +++ b/controllers/test_controller.py @@ -5,12 +5,12 @@ import json import logging from datetime import datetime -from typing import Any, Dict, IO, List, Tuple +from typing import Any, Dict, IO, List, Tuple, Union import pandas as pd from openai import APIConnectionError, APIStatusError, RateLimitError -from models.test_result import TestResult +from models.test_result import TestResult, test_result_importer from models.question import Question from utils import openai_client @@ -52,12 +52,14 @@ def import_results_action( if uploaded_file is None: raise ValueError("Nessun file caricato.") - success, message = TestResult.import_from_file(uploaded_file) - if not success: - raise ValueError(message) - + result = test_result_importer.import_from_file(uploaded_file) results = load_results() - return results, message + return results, result["message"] + + +def export_results_action(destination: Union[str, IO[str]]) -> None: + """Esporta i risultati dei test nella destinazione fornita.""" + test_result_importer.export_to_file(destination) def generate_answer(question: str, client_config: Dict[str, Any]) -> str: @@ -68,13 +70,16 @@ def generate_answer(question: str, client_config: Dict[str, Any]) -> str: """ api_key = str(client_config.get("api_key", "")) - client = openai_client.get_openai_client( - api_key=api_key, - base_url=client_config.get("endpoint"), - ) - if not client: - logger.error("Client API per la generazione risposte non configurato.") - raise ValueError("Client API non configurato") + try: + client = openai_client.get_openai_client( + api_key=api_key, + base_url=client_config.get("endpoint"), + ) + except openai_client.ClientCreationError as exc: + logger.error( + "Client API per la generazione risposte non configurato: %s", exc + ) + raise ValueError("Client API non configurato") from exc if question is None or not isinstance(question, str) or question.strip() == "": logger.error("La domanda fornita è vuota o non valida.") @@ -119,12 +124,15 @@ def evaluate_answer( """ api_key = str(client_config.get("api_key", "")) - client = openai_client.get_openai_client( - api_key=api_key, - base_url=client_config.get("endpoint"), - ) - if not client: - raise ValueError("Errore: Client API per la valutazione non configurato.") + try: + client = openai_client.get_openai_client( + api_key=api_key, + base_url=client_config.get("endpoint"), + ) + except openai_client.ClientCreationError as exc: + raise ValueError( + "Errore: Client API per la valutazione non configurato." + ) from exc prompt = f""" Sei un valutatore esperto che valuta la qualità delle risposte alle domande. diff --git a/models/database.py b/models/database.py index 84583d7..6332a93 100644 --- a/models/database.py +++ b/models/database.py @@ -17,7 +17,18 @@ class DatabaseEngine: _instance = None _instance_lock = threading.Lock() + def __new__(cls, *args, **kwargs): + if cls._instance is not None: + raise RuntimeError( + "DatabaseEngine è un singleton; usa DatabaseEngine.instance()" + ) + return super().__new__(cls) + def __init__(self) -> None: + if self.__class__._instance is not None: + raise RuntimeError( + "DatabaseEngine è un singleton; usa DatabaseEngine.instance()" + ) self._engine: Optional[Engine] = None self._session_factory: Optional[sessionmaker] = None self._engine_lock = threading.Lock() @@ -31,6 +42,19 @@ def instance(cls) -> "DatabaseEngine": cls._instance = cls() return cls._instance + @classmethod + def reset_instance(cls) -> None: + """Reimposta l'istanza singleton e svuota le risorse in cache.""" + with cls._instance_lock: + if cls._instance is not None: + with cls._instance._engine_lock: + if cls._instance._engine is not None: + cls._instance._engine.dispose() + cls._instance._engine = None + with cls._instance._session_lock: + cls._instance._session_factory = None + cls._instance = None + def _load_config(self) -> Mapping[str, str]: config = configparser.ConfigParser() root = Path(__file__).resolve().parent.parent diff --git a/models/question.py b/models/question.py index 54e33c2..2f04454 100644 --- a/models/question.py +++ b/models/question.py @@ -11,6 +11,8 @@ from models.orm_models import QuestionORM, question_set_questions from utils.data_format_utils import format_questions_for_view from utils.file_reader_utils import read_questions, filter_new_rows +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate logger = logging.getLogger(__name__) @@ -132,36 +134,6 @@ def _persist_entities(df: pd.DataFrame) -> Tuple[int, List[str]]: return added_count, warnings - @staticmethod - def import_from_file(file: IO[str] | IO[bytes]) -> Dict[str, Any]: - """Importa domande da un file CSV o JSON. - - Parametri - --------- - file: file-like - File contenente le domande da importare. - - Restituisce - ----------- - dict - ``{"success": bool, "imported_count": int, "warnings": list[str]}`` - """ - - try: - df = read_questions(file) - except ValueError as exc: - return {"success": False, "imported_count": 0, "warnings": [str(exc)]} - except Exception as exc: # pragma: no cover - defensive - return { - "success": False, - "imported_count": 0, - "warnings": [f"Errore durante la lettura del file: {exc}"], - } - - imported, warnings = Question._persist_entities(df) - - return {"success": True, "imported_count": imported, "warnings": warnings} - @staticmethod def filter_by_category( category: Optional[str] = None, @@ -174,3 +146,24 @@ def filter_by_category( filtered_df = df[df["categoria"] == category] if category else df return filtered_df, categories + + +class QuestionImporter(ImportTemplate, ExportTemplate): + """Importer per le domande basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> pd.DataFrame: # type: ignore[override] + """Legge le domande dal file usando ``read_questions``.""" + return read_questions(file) + + def persist_data(self, df: pd.DataFrame) -> Dict[str, Any]: # type: ignore[override] + """Persiste i dati tramite :meth:`Question._persist_entities`.""" + imported, warnings = Question._persist_entities(df) + return {"success": True, "imported_count": imported, "warnings": warnings} + + def gather_data(self) -> pd.DataFrame: # type: ignore[override] + """Recupera tutte le domande dal database.""" + questions = Question.load_all() + return pd.DataFrame([q.__dict__ for q in questions]) + + +question_importer = QuestionImporter() diff --git a/models/question_set.py b/models/question_set.py index 79c5767..c0e34b1 100644 --- a/models/question_set.py +++ b/models/question_set.py @@ -7,6 +7,8 @@ from sqlalchemy import select from utils.file_reader_utils import read_question_sets +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate from models.database import DatabaseEngine from models.orm_models import QuestionSetORM, QuestionORM logger = logging.getLogger(__name__) @@ -237,19 +239,57 @@ def _persist_entities( @staticmethod def import_from_file(uploaded_file: IO[str] | IO[bytes]) -> "PersistSetsResult": - """Importa uno o più set di domande da un file JSON o CSV.""" + """Deprecated wrapper for compatibility. + + Usa :class:`QuestionSetImporter` per le nuove importazioni. + """ if uploaded_file is None: raise ValueError("Nessun file fornito per l'importazione.") - data = read_question_sets(uploaded_file) + import warnings + + warnings.warn( + "QuestionSet.import_from_file è deprecato; usa QuestionSetImporter.import_from_file", + DeprecationWarning, + stacklevel=2, + ) + + return question_set_importer.import_from_file(uploaded_file) + + +class QuestionSetImporter(ImportTemplate, ExportTemplate): + """Importer per i set di domande basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> List[Dict[str, Any]]: # type: ignore[override] + """Legge i set di domande dal file usando ``read_question_sets``.""" + return read_question_sets(file) + + def persist_data(self, parsed: List[Dict[str, Any]]) -> PersistSetsResult: # type: ignore[override] + """Persiste i dati tramite :meth:`QuestionSet._persist_entities`.""" from controllers.question_controller import load_questions - current_questions = load_questions() from controllers.question_set_controller import load_sets + + current_questions = load_questions() current_sets = load_sets() - persist_result = QuestionSet._persist_entities( - data, current_questions, current_sets - ) - return persist_result + return QuestionSet._persist_entities(parsed, current_questions, current_sets) + + def gather_data(self) -> List[Dict[str, Any]]: # type: ignore[override] + """Recupera tutti i set di domande con i dettagli delle domande.""" + from models.question import Question + + sets = QuestionSet.load_all() + questions = { + q.id: {"id": q.id, "domanda": q.domanda, "risposta_attesa": q.risposta_attesa, "categoria": q.categoria} + for q in Question.load_all() + } + data: List[Dict[str, Any]] = [] + for s in sets: + q_list = [questions.get(qid, {"id": qid}) for qid in s.questions] + data.append({"name": s.name, "questions": q_list}) + return data + + +question_set_importer = QuestionSetImporter() diff --git a/models/test_result.py b/models/test_result.py index 13fc5cd..307943b 100644 --- a/models/test_result.py +++ b/models/test_result.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, asdict import uuid -from typing import Any, Dict, List, Tuple, cast +from typing import IO, Any, Dict, List, cast from functools import lru_cache import pandas as pd @@ -11,6 +11,9 @@ from models.database import DatabaseEngine from models.orm_models import TestResultORM from utils.file_reader_utils import read_test_results, filter_new_rows +from utils.import_template import ImportTemplate +from utils.export_template import ExportTemplate + logger = logging.getLogger(__name__) @@ -77,32 +80,8 @@ def _persist_entities(imported_df: pd.DataFrame) -> int: TestResult(**row) for row in combined_df.to_dict(orient="records") ] TestResult.save(results) - TestResult.refresh_cache() return added_count - @staticmethod - def import_from_file(file) -> Tuple[bool, str]: - """Importa risultati di test da ``file``. - - Il file è analizzato tramite :func:`utils.file_reader_utils.read_test_results`. - I risultati esistenti (corrispondenti per ``id``) vengono ignorati. Le nuove - voci vengono salvate e la cache viene aggiornata. - """ - - try: - imported_df = read_test_results(file) - added_count = TestResult._persist_entities(imported_df) - message = ( - f"Importati {added_count} risultati." - if added_count > 0 - else "Nessun nuovo risultato importato." - ) - return True, message - except ValueError as e: - return False, str(e) - except Exception as e: # pragma: no cover - return False, f"Errore durante l'importazione dei risultati: {str(e)}" - @staticmethod def save(results: List["TestResult"]) -> None: """Salva un elenco di risultati di test.""" @@ -189,3 +168,30 @@ def calculate_statistics( "radar_metrics": radar_metrics, } + +class TestResultImporter(ImportTemplate, ExportTemplate): + """Importer per i risultati di test basato su :class:`ImportTemplate` e :class:`ExportTemplate`.""" + + def parse_file(self, file: IO[Any]) -> pd.DataFrame: # type: ignore[override] + """Legge i risultati dal file usando ``read_test_results``.""" + return read_test_results(file) + + def persist_data(self, df: pd.DataFrame) -> Dict[str, Any]: # type: ignore[override] + """Persiste i dati tramite :meth:`TestResult._persist_entities`.""" + added_count = TestResult._persist_entities(df) + if added_count > 0: + TestResult.refresh_cache() + message = ( + f"Importati {added_count} risultati." + if added_count > 0 + else "Nessun nuovo risultato importato." + ) + return {"success": True, "imported_count": added_count, "message": message} + + def gather_data(self) -> pd.DataFrame: # type: ignore[override] + """Recupera tutti i risultati dei test dal database.""" + return TestResult.load_all_df() + + +test_result_importer = TestResultImporter() + diff --git a/tests/conftest.py b/tests/conftest.py index 753bd35..fcdbe1a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,14 +12,14 @@ @pytest.fixture() def in_memory_db(): - # Reset singleton to ensure clean state - DatabaseEngine._instance = None # type: ignore[attr-defined] + # Reimposta il singleton per garantire uno stato pulito + DatabaseEngine.reset_instance() db = DatabaseEngine.instance() engine = create_engine("sqlite:///:memory:") Base.metadata.create_all(engine) db._engine = engine # type: ignore[attr-defined] db._session_factory = sessionmaker(bind=engine) # type: ignore[attr-defined] yield db - # Reset after test - DatabaseEngine._instance = None # type: ignore[attr-defined] + # Reimposta dopo il test + DatabaseEngine.reset_instance() diff --git a/tests/test_app.py b/tests/test_app.py index bc3da92..312c9c2 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -5,8 +5,8 @@ def test_app_page_config_and_navigation(monkeypatch): - """Smoke test for Streamlit app configuration and navigation setup.""" - # Record calls to Streamlit API + """Test di base per la configurazione dell'app Streamlit e l'impostazione della navigazione.""" + # Registra le chiamate all'API di Streamlit page_config = {} radio_call = {} @@ -27,13 +27,13 @@ def fake_radio(label, options): monkeypatch.setitem(sys.modules, "streamlit", fake_st) - # Ensure repository root is importable + # Assicura che la radice del repository sia importabile project_root = Path(__file__).resolve().parent.parent sys.path.insert(0, str(project_root)) - # Stub view modules required by app.py + # Crea moduli di vista fittizi richiesti da app.py views_pkg = types.ModuleType("views") - views_pkg.__path__ = [] # mark as package + views_pkg.__path__ = [] # contrassegna come pacchetto view_names = [ "api_configurazione", "esecuzione_test", @@ -48,6 +48,15 @@ def fake_radio(label, options): sys.modules[f"views.{name}"] = mod setattr(views_pkg, name, mod) + views_pkg.page_registry = { + "Home": views_pkg.home.render, + "Configurazione API": views_pkg.api_configurazione.render, + "Gestione Domande": views_pkg.gestione_domande.render, + "Gestione Set di Domande": views_pkg.gestione_set.render, + "Esecuzione Test": views_pkg.esecuzione_test.render, + "Visualizzazione Risultati": views_pkg.visualizza_risultati.render, + } + session_state_mod = types.ModuleType("views.session_state") session_state_mod.initialize_session_state = lambda: None sys.modules["views.session_state"] = session_state_mod @@ -58,7 +67,7 @@ def fake_radio(label, options): sys.modules["views"] = views_pkg - # Ensure a fresh import of app + # Assicura un'importazione pulita di app monkeypatch.delitem(sys.modules, "app", raising=False) app = importlib.import_module("app") diff --git a/tests/test_evaluate_answer.py b/tests/test_evaluate_answer.py index 782745a..3adcbfe 100644 --- a/tests/test_evaluate_answer.py +++ b/tests/test_evaluate_answer.py @@ -5,6 +5,8 @@ import pytest +from utils.openai_client import ClientCreationError + sys.path.append(os.path.dirname(os.path.dirname(__file__))) from controllers.test_controller import evaluate_answer # noqa: E402 @@ -50,7 +52,10 @@ def test_evaluate_answer_success(mocker): def test_evaluate_answer_no_client(mocker): - mocker.patch("utils.openai_client.get_openai_client", return_value=None) + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) with pytest.raises(ValueError): evaluate_answer( "q", "expected", "actual", {"api_key": None} diff --git a/tests/test_file_writer_utils.py b/tests/test_file_writer_utils.py new file mode 100644 index 0000000..db5a08d --- /dev/null +++ b/tests/test_file_writer_utils.py @@ -0,0 +1,22 @@ +import json +import os +import pandas as pd +from utils.file_writer_utils import write_dataset + + +def test_write_dataset_csv(tmp_path): + df = pd.DataFrame([{"a": 1, "b": 2}]) + path = tmp_path / "out.csv" + write_dataset(df, path) + assert path.exists() + loaded = pd.read_csv(path) + assert loaded.iloc[0]["a"] == 1 + + +def test_write_dataset_json(tmp_path): + data = [{"a": 1}, {"a": 2}] + path = tmp_path / "out.json" + write_dataset(data, path) + with open(path, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded[1]["a"] == 2 diff --git a/tests/test_import_results.py b/tests/test_import_results.py index 19a26cd..eeb0b85 100644 --- a/tests/test_import_results.py +++ b/tests/test_import_results.py @@ -6,7 +6,7 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from models.test_result import TestResult +from models.test_result import TestResult, test_result_importer data_dir = os.path.join(os.path.dirname(__file__), "sample_data") @@ -21,10 +21,10 @@ def test_import_from_file_skips_duplicates_and_saves(mocker, filename): [{"id": "1", "set_id": "s1", "timestamp": "t0", "results": {}}] ) with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: - success, message = TestResult.import_from_file(f) + result = test_result_importer.import_from_file(f) - assert success is True - assert message == "Importati 1 risultati." + assert result["success"] is True + assert result["message"] == "Importati 1 risultati." mock_save.assert_called_once() saved = mock_save.call_args[0][0] assert {r.id for r in saved} == {"1", "2"} diff --git a/tests/test_importer_export.py b/tests/test_importer_export.py new file mode 100644 index 0000000..2432d01 --- /dev/null +++ b/tests/test_importer_export.py @@ -0,0 +1,44 @@ +import pandas as pd +from models.question import Question, question_importer +from models.question_set import QuestionSet, question_set_importer +from models.test_result import TestResult, test_result_importer + + +def test_question_gather_data(mocker): + mocker.patch( + "models.question.Question.load_all", + return_value=[Question(id="1", domanda="d", risposta_attesa="a", categoria="c")], + ) + df = question_importer.gather_data() + assert df.to_dict(orient="records") == [ + {"id": "1", "domanda": "d", "risposta_attesa": "a", "categoria": "c"} + ] + + +def test_question_set_gather_data(mocker): + mocker.patch( + "models.question.Question.load_all", + return_value=[Question(id="1", domanda="d", risposta_attesa="a", categoria="c")], + ) + mocker.patch( + "models.question_set.QuestionSet.load_all", + return_value=[QuestionSet(id="s1", name="S1", questions=["1"])] + ) + data = question_set_importer.gather_data() + assert data == [ + {"name": "S1", "questions": [ + {"id": "1", "domanda": "d", "risposta_attesa": "a", "categoria": "c"} + ]} + ] + + +def test_test_result_gather_data(mocker): + df = pd.DataFrame([ + {"id": "1", "set_id": "s", "timestamp": "t", "results": {}} + ]) + mocker.patch( + "models.test_result.TestResult.load_all_df", + return_value=df, + ) + result = test_result_importer.gather_data() + assert result.equals(df) diff --git a/tests/test_models_database.py b/tests/test_models_database.py index f80606d..cabfed8 100644 --- a/tests/test_models_database.py +++ b/tests/test_models_database.py @@ -6,9 +6,8 @@ def test_get_engine_uses_config_and_create_engine(monkeypatch): - DatabaseEngine._instance = None # ensure fresh singleton + DatabaseEngine.reset_instance() # assicura un singleton pulito db = DatabaseEngine.instance() - db._engine = None # type: ignore[attr-defined] fake_cfg = {'user': 'u', 'password': 'p', 'host': 'h', 'database': 'db'} monkeypatch.setattr(DatabaseEngine, '_load_config', lambda self: fake_cfg) called = {} @@ -16,7 +15,7 @@ def test_get_engine_uses_config_and_create_engine(monkeypatch): def fake_ensure(self, cfg): called['ensure'] = cfg monkeypatch.setattr(DatabaseEngine, '_ensure_database', fake_ensure) - fake_engine = SimpleNamespace() + fake_engine = SimpleNamespace(dispose=lambda: None) def fake_create_engine(url, pool_pre_ping=True, pool_recycle=3600): called['url'] = url @@ -27,12 +26,12 @@ def fake_create_engine(url, pool_pre_ping=True, pool_recycle=3600): assert engine is fake_engine assert called['ensure'] == fake_cfg assert 'mysql+pymysql://u:p@h:3306/db' in called['url'] - # second call should reuse same engine + # la seconda chiamata dovrebbe riutilizzare lo stesso engine assert db.get_engine() is fake_engine def test_ensure_database_error(monkeypatch): - DatabaseEngine._instance = None + DatabaseEngine.reset_instance() db = DatabaseEngine.instance() class DummyEngine: diff --git a/tests/test_openai_client.py b/tests/test_openai_client.py index 2d5675d..957f524 100644 --- a/tests/test_openai_client.py +++ b/tests/test_openai_client.py @@ -3,10 +3,13 @@ import sys from types import SimpleNamespace +import pytest + sys.path.append(os.path.dirname(os.path.dirname(__file__))) from utils.openai_client import ( # noqa: E402 DEFAULT_MODEL, + ClientCreationError, get_available_models_for_endpoint, get_openai_client, ) @@ -14,11 +17,18 @@ def test_get_openai_client_no_api_key(caplog): caplog.set_level(logging.WARNING) - client = get_openai_client("") - assert client is None + with pytest.raises(ClientCreationError): + get_openai_client("") assert "Tentativo di creare client OpenAI senza chiave API." in caplog.text +def test_get_openai_client_creation_failure(mocker): + mock_openai = mocker.patch("utils.openai_client.OpenAI", side_effect=RuntimeError("boom")) + with pytest.raises(ClientCreationError): + get_openai_client("key") + mock_openai.assert_called_once() + + def test_get_openai_client_uses_custom_base_url(mocker): mock_openai = mocker.patch("utils.openai_client.OpenAI") mock_client = mocker.MagicMock() @@ -31,7 +41,10 @@ def test_get_openai_client_uses_custom_base_url(mocker): def test_get_available_models_returns_error_when_no_client(mocker): - mocker.patch("utils.openai_client.get_openai_client", return_value=None) + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) models = get_available_models_for_endpoint( "Personalizzato", endpoint_url="http://endpoint", api_key="key" ) diff --git a/tests/test_openai_controllers.py b/tests/test_openai_controllers.py index e31d187..8896174 100644 --- a/tests/test_openai_controllers.py +++ b/tests/test_openai_controllers.py @@ -3,6 +3,8 @@ import pytest +from utils.openai_client import ClientCreationError + # Aggiunge la cartella principale al percorso dei moduli per i test sys.path.append(os.path.dirname(os.path.dirname(__file__))) @@ -34,7 +36,10 @@ def test_generate_answer_success(mocker): def test_generate_answer_no_client(mocker): - mocker.patch("utils.openai_client.get_openai_client", return_value=None) + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) with pytest.raises(ValueError): generate_answer("question", {"api_key": None}) @@ -80,7 +85,10 @@ def test_test_api_connection_unexpected_response(mocker): def test_test_api_connection_no_client(mocker): - mocker.patch("utils.openai_client.get_openai_client", return_value=None) + mocker.patch( + "utils.openai_client.get_openai_client", + side_effect=ClientCreationError("boom"), + ) ok, msg = api_preset_controller.test_api_connection( "key", "endpoint", "model", 0.1, 10 ) diff --git a/tests/test_question_controller.py b/tests/test_question_controller.py index f318dad..a1fe66f 100644 --- a/tests/test_question_controller.py +++ b/tests/test_question_controller.py @@ -141,6 +141,15 @@ def test_filter_by_category_empty_df(mocker): assert categories == [] +def test_export_questions_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.question_controller.question_importer.export_to_file" + ) + dest = tmp_path / "qs.csv" + question_controller.export_questions_action(dest) + mock_export.assert_called_once_with(dest) + + def test_get_question_text_found(mocker): mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") mock_load = mocker.patch("controllers.question_controller.load_questions") @@ -223,7 +232,7 @@ def test_delete_question_action(mocker): def test_import_questions_action_success(mocker): mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") mock_import = mocker.patch( - "controllers.question_controller.Question.import_from_file" + "controllers.question_controller.question_importer.import_from_file" ) mock_import.return_value = { "success": True, @@ -251,7 +260,7 @@ def test_import_questions_action_no_file(): def test_import_questions_action_failure(mocker): mock_refresh = mocker.patch("controllers.question_controller.refresh_questions") mock_import = mocker.patch( - "controllers.question_controller.Question.import_from_file" + "controllers.question_controller.question_importer.import_from_file" ) mock_import.return_value = { "success": False, diff --git a/tests/test_question_import.py b/tests/test_question_import.py index 9b90df9..50b4077 100644 --- a/tests/test_question_import.py +++ b/tests/test_question_import.py @@ -3,7 +3,7 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from models.question import Question +from models.question import question_importer class DummySession: @@ -49,7 +49,7 @@ def test_import_from_file_skips_duplicates_and_adds_new(mocker): for filename in ["questions.csv", "questions.json"]: engine.session.inserted.clear() with open(os.path.join(data_dir, filename), "r", encoding="utf-8") as f: - result = Question.import_from_file(f) + result = question_importer.import_from_file(f) assert result["success"] is True assert result["imported_count"] == 1 assert any("q1" in w for w in result["warnings"]) diff --git a/tests/test_question_set_controller.py b/tests/test_question_set_controller.py index e38a9ba..b8ec97e 100644 --- a/tests/test_question_set_controller.py +++ b/tests/test_question_set_controller.py @@ -82,3 +82,12 @@ def test_prepare_sets_for_view(mocker): assert result["sets_df"].iloc[0]["questions_detail"] == [ {"id": "1", "domanda": "d1", "categoria": "A"} ] + + +def test_export_sets_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.question_set_controller.question_set_importer.export_to_file" + ) + dest = tmp_path / "sets.csv" + question_set_controller.export_sets_action(dest) + mock_export.assert_called_once_with(dest) diff --git a/tests/test_question_set_importer.py b/tests/test_question_set_importer.py index 3eefb94..fd24e6b 100644 --- a/tests/test_question_set_importer.py +++ b/tests/test_question_set_importer.py @@ -11,6 +11,7 @@ from models.question_set import ( QuestionSet, PersistSetsResult, + question_set_importer, ) from utils.file_reader_utils import read_question_sets @@ -99,3 +100,34 @@ def test_persist_sets_skips_duplicates(mocker): assert result.existing_questions_found_count == 0 assert any("esiste già" in w for w in result.warnings) mock_create.assert_called_once_with("New", []) + + +def test_question_set_importer_invokes_helpers(mocker): + mock_reader = mocker.patch( + "models.question_set.read_question_sets", + return_value=[{"name": "Sample", "questions": []}], + ) + mock_persist = mocker.patch( + "models.question_set.QuestionSet._persist_entities", + return_value="ok", + ) + current_questions = pd.DataFrame() + current_sets = pd.DataFrame() + mocker.patch( + "controllers.question_controller.load_questions", + return_value=current_questions, + ) + mocker.patch( + "controllers.question_set_controller.load_sets", + return_value=current_sets, + ) + + file = io.StringIO("[]") + file.name = "data.json" + result = question_set_importer.import_from_file(file) + + mock_reader.assert_called_once_with(file) + mock_persist.assert_called_once_with( + [{"name": "Sample", "questions": []}], current_questions, current_sets + ) + assert result == "ok" diff --git a/tests/test_register_page.py b/tests/test_register_page.py new file mode 100644 index 0000000..071fb14 --- /dev/null +++ b/tests/test_register_page.py @@ -0,0 +1,20 @@ +import pytest + +from views import register_page, page_registry + + +def test_register_page_prevents_duplicates(): + page_registry.clear() + + @register_page("Example") + def first(): + return "first" + + assert page_registry["Example"] is first + + with pytest.raises(ValueError): + @register_page("Example") + def second(): # pragma: no cover - funzione non registrata + return "second" + + assert page_registry["Example"] is first diff --git a/tests/test_test_controller.py b/tests/test_test_controller.py index 16e4a2b..95fae57 100644 --- a/tests/test_test_controller.py +++ b/tests/test_test_controller.py @@ -12,7 +12,7 @@ def test_import_results_action_no_file(mocker): mock_import = mocker.patch( - "controllers.test_controller.TestResult.import_from_file" + "controllers.test_controller.test_result_importer.import_from_file" ) mock_load_results = mocker.patch("controllers.test_controller.load_results") with pytest.raises(ValueError, match="Nessun file caricato"): @@ -23,10 +23,10 @@ def test_import_results_action_no_file(mocker): def test_import_results_action_failure(mocker): mock_import = mocker.patch( - "controllers.test_controller.TestResult.import_from_file" + "controllers.test_controller.test_result_importer.import_from_file" ) mock_load_results = mocker.patch("controllers.test_controller.load_results") - mock_import.return_value = (False, "errore") + mock_import.side_effect = ValueError("errore") with pytest.raises(ValueError, match="errore"): import_results_action("dummy") mock_load_results.assert_not_called() @@ -91,3 +91,14 @@ def test_run_test_generation_and_evaluation_errors(mocker): assert q2["actual_answer"] == "ans2" assert q2["evaluation"]["score"] == 0 assert isinstance(res["results_df"], pd.DataFrame) + + +def test_export_results_action(mocker, tmp_path): + mock_export = mocker.patch( + "controllers.test_controller.test_result_importer.export_to_file" + ) + dest = tmp_path / "results.json" + from controllers.test_controller import export_results_action + + export_results_action(dest) + mock_export.assert_called_once_with(dest) diff --git a/tests/test_visualizza_risultati_view.py b/tests/test_visualizza_risultati_view.py index 138daa1..68b21af 100644 --- a/tests/test_visualizza_risultati_view.py +++ b/tests/test_visualizza_risultati_view.py @@ -49,6 +49,9 @@ def markdown(self, *args, **kwargs): def selectbox(self, label, options, index=0, **kwargs): return options[index] + def text_input(self, label, value="", **kwargs): + return value + def button(self, label, on_click=None, **kwargs): if on_click: self.captured_callbacks[label] = on_click diff --git a/utils/export_template.py b/utils/export_template.py new file mode 100644 index 0000000..a887314 --- /dev/null +++ b/utils/export_template.py @@ -0,0 +1,34 @@ +"""Template base per l'esportazione di dati su file.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, IO, Union, final + + +class ExportTemplate(ABC): + """Classe base astratta per implementare l'esportazione su file.""" + + @abstractmethod + def gather_data(self) -> Any: + """Raccoglie i dati correnti da esportare.""" + pass + + @final + def export_to_file(self, destination: Union[str, IO[Any]]) -> None: + """Esporta i dati raccolti su ``destination``. + + Nota: le sottoclassi non devono sovrascrivere questo metodo. + + Parameters + ---------- + destination: Union[str, IO[Any]] + Percorso del file di destinazione oppure file aperto in scrittura. + """ + from utils.file_writer_utils import write_dataset + + data = self.gather_data() + write_dataset(data, destination) + + +__all__ = ["ExportTemplate"] diff --git a/utils/file_writer_utils.py b/utils/file_writer_utils.py new file mode 100644 index 0000000..cb8606b --- /dev/null +++ b/utils/file_writer_utils.py @@ -0,0 +1,51 @@ +"""Utility per la serializzazione di dataset in CSV o JSON.""" + +from __future__ import annotations + +import json +import os +from typing import Any, IO, Union + +import pandas as pd + +__all__ = ["write_dataset"] + + +def _ensure_dataframe(data: Any) -> pd.DataFrame: + """Converte ``data`` in ``DataFrame`` se possibile.""" + if isinstance(data, pd.DataFrame): + return data + return pd.DataFrame(data) + + +def write_dataset(data: Any, destination: Union[str, IO[str]]) -> None: + """Scrive ``data`` su ``destination`` in formato CSV o JSON. + + Il formato viene determinato dall'estensione del file. + ``destination`` può essere un percorso o un file aperto in scrittura. + """ + close_after = False + if isinstance(destination, (str, os.PathLike)): + file_path = os.fspath(destination) + ext = os.path.splitext(file_path)[1].lower() + f: IO[str] = open(file_path, "w", encoding="utf-8", newline="") + close_after = True + else: + f = destination + name = getattr(f, "name", "") + ext = os.path.splitext(name)[1].lower() + + if ext == ".csv": + df = _ensure_dataframe(data) + df.to_csv(f, index=False) + elif ext == ".json": + if isinstance(data, pd.DataFrame): + payload = data.to_dict(orient="records") + else: + payload = data + json.dump(payload, f, ensure_ascii=False, indent=2) + else: + raise ValueError("Formato file non supportato. Usare estensione .csv o .json") + + if close_after: + f.close() diff --git a/utils/import_template.py b/utils/import_template.py new file mode 100644 index 0000000..8c7054a --- /dev/null +++ b/utils/import_template.py @@ -0,0 +1,72 @@ +"""Template base per l'importazione di dati da file.""" + +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import Any, IO, final + +logger = logging.getLogger(__name__) + + +class ImportTemplate(ABC): + """Classe base astratta per implementare l'importazione da file. + + Le sottoclassi devono implementare ``parse_file`` e ``persist_data`` mentre + questo template gestisce il flusso dell'operazione e l'handling degli errori. + """ + + @final + def import_from_file(self, file: IO[Any]) -> Any: + """Esegue l'importazione da un file. + + Questo metodo fornisce il flusso standard dell'importazione e **non deve + essere sovrascritto** dalle sottoclassi. + + Il flusso standard prevede il parsing tramite :meth:`parse_file` seguito + dalla persistenza dei dati con :meth:`persist_data`. + + Parameters + ---------- + file: IO[Any] + File aperto da cui leggere i dati. Non viene chiuso. + + Returns + ------- + Any + Il valore restituito da :meth:`persist_data`. + + Raises + ------ + ValueError + Se si verifica un errore durante l'importazione. L'eccezione + originale viene loggata e incapsulata in un ``ValueError``. + """ + try: + logger.debug( + "Avvio importazione dal file %s", getattr(file, "name", "") + ) + parsed = self.parse_file(file) + logger.debug("Parsing completato: %s", parsed) + result = self.persist_data(parsed) + logger.info( + "Importazione completata con successo dal file %s", + getattr(file, "name", ""), + ) + return result + except Exception as exc: # noqa: BLE001 + logger.exception("Errore durante l'importazione: %s", exc) + raise ValueError("Errore durante l'importazione") from exc + + @abstractmethod + def parse_file(self, file: IO[Any]) -> Any: + """Legge e interpreta il contenuto di ``file``.""" + pass + + @abstractmethod + def persist_data(self, parsed: Any) -> Any: + """Persiste i dati parsati nel database o in altra destinazione.""" + pass + + +__all__ = ["ImportTemplate"] diff --git a/utils/openai_client.py b/utils/openai_client.py index 7b7a369..074b5a2 100644 --- a/utils/openai_client.py +++ b/utils/openai_client.py @@ -11,12 +11,19 @@ DEFAULT_ENDPOINT: str = "https://api.openai.com/v1" -def get_openai_client(api_key: str, base_url: str | None = None) -> OpenAI | None: - """Crea e restituisce un client OpenAI configurato.""" +class ClientCreationError(Exception): + """Eccezione sollevata quando la creazione del client OpenAI fallisce.""" + + +def get_openai_client(api_key: str, base_url: str | None = None) -> OpenAI: + """Crea e restituisce un client OpenAI configurato. + + Solleva ``ClientCreationError`` se la chiave API è mancante o la creazione fallisce. + """ if not api_key: logger.warning("Tentativo di creare client OpenAI senza chiave API.") - return None + raise ClientCreationError("Chiave API mancante") try: effective_base_url = ( base_url @@ -24,9 +31,9 @@ def get_openai_client(api_key: str, base_url: str | None = None) -> OpenAI | Non else DEFAULT_ENDPOINT ) return OpenAI(api_key=api_key, base_url=effective_base_url) - except Exception as exc: + except Exception as exc: # noqa: BLE001 logger.error(f"Errore durante la creazione del client OpenAI: {exc}") - return None + raise ClientCreationError(str(exc)) from exc def get_available_models_for_endpoint( @@ -52,8 +59,9 @@ def get_available_models_for_endpoint( "gpt-3.5-turbo", ] - client = get_openai_client(api_key=api_key, base_url=endpoint_url) - if not client: + try: + client = get_openai_client(api_key=api_key, base_url=endpoint_url) + except ClientCreationError: return ["(Errore creazione client API)", DEFAULT_MODEL] try: models_response: Any = client.models.list() @@ -93,6 +101,7 @@ def get_available_models_for_endpoint( __all__ = [ "DEFAULT_MODEL", "DEFAULT_ENDPOINT", + "ClientCreationError", "get_openai_client", "get_available_models_for_endpoint", ] diff --git a/views/__init__.py b/views/__init__.py index 87130c9..cb47f14 100644 --- a/views/__init__.py +++ b/views/__init__.py @@ -1,4 +1,23 @@ """Pacchetto delle viste.""" import logging +from typing import Callable, Dict + logger = logging.getLogger(__name__) + + +page_registry: Dict[str, Callable] = {} + + +def register_page(name: str): + """Decoratore per registrare la funzione di rendering di una pagina.""" + + def decorator(func: Callable) -> Callable: + if name in page_registry: + messaggio = f"La pagina '{name}' è già registrata" + logger.warning(messaggio) + raise ValueError(messaggio) + page_registry[name] = func + return func + + return decorator diff --git a/views/api_configurazione.py b/views/api_configurazione.py index f46b8ed..30cfe72 100644 --- a/views/api_configurazione.py +++ b/views/api_configurazione.py @@ -1,6 +1,7 @@ import logging import streamlit as st +from views import register_page from views.style_utils import add_page_header, add_section_title from controllers import ( save_preset, @@ -115,6 +116,7 @@ def delete_preset_callback(preset_id): st.error(message) +@register_page("Configurazione API") def render(): add_page_header( "Gestione Preset API", diff --git a/views/esecuzione_test.py b/views/esecuzione_test.py index d59924a..44b9130 100644 --- a/views/esecuzione_test.py +++ b/views/esecuzione_test.py @@ -3,6 +3,7 @@ import streamlit as st from controllers import run_test, load_sets, load_presets, get_preset_by_name +from views import register_page from views.style_utils import add_page_header, add_section_title logger = logging.getLogger(__name__) @@ -21,6 +22,7 @@ def run_llm_test_callback(): st.session_state.run_llm_test = True +@register_page("Esecuzione Test") def render(): # === Inizializzazione delle variabili di stato === if 'test_mode' not in st.session_state: diff --git a/views/gestione_domande.py b/views/gestione_domande.py index 3eeb426..8ab7ef9 100644 --- a/views/gestione_domande.py +++ b/views/gestione_domande.py @@ -11,6 +11,7 @@ delete_question_action, import_questions_action, ) +from views import register_page from views.style_utils import add_page_header from views.state_models import QuestionPageState logger = logging.getLogger(__name__) @@ -105,6 +106,7 @@ def confirm_delete_question_dialog(question_id, question_text): st.rerun() +@register_page("Gestione Domande") def render(): # === Inizializzazione dello stato === st.session_state.setdefault("question_page_state", QuestionPageState()) diff --git a/views/gestione_set.py b/views/gestione_set.py index ebd6b69..3879006 100644 --- a/views/gestione_set.py +++ b/views/gestione_set.py @@ -5,6 +5,7 @@ load_sets, prepare_sets_for_view, ) +from views import register_page from views.style_utils import add_page_header, add_global_styles from views.state_models import SetPageState from views.set_helpers import ( @@ -17,6 +18,7 @@ logger = logging.getLogger(__name__) +@register_page("Gestione Set di Domande") def render(): add_global_styles() diff --git a/views/home.py b/views/home.py index fd7d96e..cd50295 100644 --- a/views/home.py +++ b/views/home.py @@ -1,12 +1,15 @@ -"""Home page view module for the Streamlit application.""" +"""Modulo della vista per la pagina Home dell'applicazione Streamlit.""" import logging import streamlit as st from .style_utils import add_home_styles +from views import register_page + logger = logging.getLogger(__name__) +@register_page("Home") def render(): """Visualizza la pagina principale con le funzionalità della piattaforma.""" diff --git a/views/visualizza_risultati.py b/views/visualizza_risultati.py index 344b8fe..15ba664 100644 --- a/views/visualizza_risultati.py +++ b/views/visualizza_risultati.py @@ -15,10 +15,12 @@ list_model_names, prepare_select_options, ) +from views import register_page from views.style_utils import add_page_header, add_section_title logger = logging.getLogger(__name__) +@register_page("Visualizzazione Risultati") def render(): add_page_header( "Visualizzazione Risultati Test", @@ -144,18 +146,32 @@ def import_results_callback(): 'timestamp': selected_result_row['timestamp'], 'results': result_data }, indent=2) + selected_filename = st.text_input( + "Nome file per export risultato selezionato", + value=f"result_{selected_result_row['id']}.json", + key="selected_result_filename", + ) + if selected_filename and not selected_filename.endswith(".json"): + selected_filename += ".json" st.download_button( "Export Risultato Selezionato", selected_json, - file_name=f"result_{selected_result_row['id']}.json", + file_name=selected_filename, mime="application/json" ) + all_filename = st.text_input( + "Nome file per export tutti i risultati", + value="all_results.json", + key="all_results_filename", + ) + if all_filename and not all_filename.endswith(".json"): + all_filename += ".json" all_json = json.dumps(st.session_state.results.to_dict(orient="records"), indent=2) st.download_button( "Export Tutti i Risultati", all_json, - file_name="all_results.json", + file_name=all_filename, mime="application/json" ) From 5941dd87f981691d2b056f47c945b2442cacd6c1 Mon Sep 17 00:00:00 2001 From: sdp01 Date: Sun, 17 Aug 2025 18:01:16 +0200 Subject: [PATCH 22/41] Spostato tutto il CSS su file apposito --- app.py | 4 +- views/style_utils.py | 347 ++----------------------------------------- views/styles.css | 319 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 337 insertions(+), 333 deletions(-) create mode 100644 views/styles.css diff --git a/app.py b/app.py index a9cfade..c090f95 100644 --- a/app.py +++ b/app.py @@ -11,7 +11,7 @@ visualizza_risultati, ) from views.session_state import initialize_session_state -from views.style_utils import add_global_styles +from views.style_utils import load_css from utils.startup_utils import setup_logging logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ st.title("🧠 LLM Test Evaluation Platform - Artificial QI") # Aggiungi CSS personalizzato e stili globali -add_global_styles() +load_css() PAGES = { "Home": home.render, diff --git a/views/style_utils.py b/views/style_utils.py index bd7deeb..dab2af9 100644 --- a/views/style_utils.py +++ b/views/style_utils.py @@ -4,191 +4,30 @@ """ import logging - import streamlit as st +from pathlib import Path + logger = logging.getLogger(__name__) +def load_css(): + """ + Applica il CSS globale presente in 'styles.css'. + """ + css_path = Path(__file__).parent.parent / "views" / "styles.css" + if css_path.exists(): + css_content = css_path.read_text() + st.markdown(f"", unsafe_allow_html=True) + else: + st.warning("File styles.css non trovato. Assicurati che sia presente nella cartella utils.") + def add_global_styles(): """Aggiunge stili globali all'applicazione.""" - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - + load_css() def add_page_header(title: str, icon: str = "💡", description: str | None = None): """Aggiunge un'intestazione di pagina stilizzata.""" - add_global_styles() - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - + load_css() st.markdown( f"""