clamsproject · mumtahinamomo · Apr 15, 2025 · Apr 21, 2025 · May 5, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -165,4 +165,4 @@ nodedatabase.db
 data/*
 upload_all.py
 eval/*
-logs/*
+logs/*.DS_Store
diff --git a/README.md b/README.md
@@ -1,21 +1,39 @@
 # MMIF Graph Visualizer
 
-This repository hosts the code for the Graph Visualizer, a collection-level visualizer for [MMIF](https://mmif.clams.ai/) files which renders MMIF files as nodes in a D3 force-directed graph.
+This repository uses the Gemma3 model from Ollama to summarize transcripts in MMIF (https://mmif.clams.ai/) files. 
+
 
 ![screenshot](https://github.com/haydenmccormick/graph-visualizer/assets/74222796/a32f5379-e463-4af9-8dc9-d78206f79aa2)
 
 ## Quick Start
 
-Currently, you can run the server in two ways:
-1. Manually, with Python:
-    * Install requirements: `pip install -r requirements.txt`
-    * Unzip `data/topic_newshour.zip` in the `data` directory
-    * Run `python app.py` to start the server. It will be accessible at `localhost:5555`
-    * Run the mmif visualizer in parallel for access to visualization. **The MMIF visualizer should be exposed to port 5000**
+1. Prerequisites:
+Before running the script, ensure you have the following installed:
+a. Python 3.8+ (recommended to use a virtual environment)
+b. Ollama – for running the gemma3 model locally
+c. Torch – for text processing (if needed by any preprocessing logic)
+d. MMIF-Python – for working with MMIF files
+
+2. Installation
+a. Clone the repository and install dependencies:
+```bash
+git clone https://github.com/your-username/clams-transcript-summarizer.git
+cd clams-transcript-summarizer
+pip install -r requirements.txt
+
+b. Make sure the Ollama app is running, and the Gemma3 model is available:
+```bash
+ollama run gemma3
+
+3. Usage:
+Run the summarizer with an MMIF file that includes ASR transcript data:
+
+```bash
+python3 summarize.py /path/to/your/transcript_file.json
+
+
+
 
-2. Using Docker/Podman
-* docker-compose up will spin up the Graph Visualizer and the MMIF visualizer, and connect them via a network.
-* **WARNING**: Because the project contains a significant amount of modeling requirements and networking, building the container may take a while, and on my hardware has consistently crashed before completing. I have not been able to debug this -- running the files locally using your own distribution of Python is likely the most efficient and accessible way to start the service.
 
 ## Directory Structure
 
@@ -50,8 +68,8 @@ This project is heavily centered around client-side Javascript code, with Python
         - date.py [Date scraping]
         - get_descriptions.py [Description scraping from AAPB API]
         - ner.py [Spacy named entity extraction]
-        - summarize.py [Abstractive summarization using BART]
-        - topic_model.py [Topic modelling using BERTopic]
+        - summarize.py [Abstractive summarization using Gemma3]
+        - topic_model.py [Topic modelling using Gemma3]
     - preprocessing/preprocess.py [functions for building description dataset]
     - templates
         - index.html

diff --git a/modeling/comparison.py b/modeling/comparison.py
@@ -0,0 +1,324 @@
+"""Simplified Transcript Summarizer Comparison Tool
+
+This script creates a focused visual comparison between two summarizer systems.
+
+Usage:
+     comparison.py --summaries1 <path_to_system1_summaries>
+                   --summaries2 <path_to_system2_summaries>
+                   [--output_dir <output_directory>]
+"""
+
+import os
+import argparse
+import json
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from tqdm import tqdm
+from rouge_score import rouge_scorer
+import nltk
+from nltk.tokenize import word_tokenize, sent_tokenize
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+
+# Ensure NLTK data is downloaded
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    nltk.download('punkt')
+
+def load_summaries(dir_path):
+    """Load summaries from a directory with robust error handling"""
+    summaries = {}
+    try:
+        files = list(Path(dir_path).glob("*.txt"))
+        if not files:
+            print(f"Warning: No .txt files found in {dir_path}")
+            return summaries
+
+        for file_path in tqdm(files, desc=f"Loading from {dir_path}"):
+            file_id = file_path.stem
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    text = f.read().strip()
+                    # Extract just the summary if it follows a pattern like "Summary: [content]"
+                    summary_match = re.search(r"Summary:\s*(.*?)(?:\n\n|\Z)", text, re.DOTALL)
+                    if summary_match:
+                        summaries[file_id] = summary_match.group(1).strip()
+                    else:
+                        summaries[file_id] = text
+            except Exception as e:
+                print(f"Error reading file {file_path}: {e}")
+    except Exception as e:
+        print(f"Error accessing directory {dir_path}: {e}")
+
+    return summaries
+
+def create_comparison_radar_chart(system1_summaries, system2_summaries, output_path):
+    """Create a single radar chart comparing the two systems"""
+    # Initialize metrics
+    rouge_scorer_obj = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
+
+    # Find common file IDs
+    common_ids = set(system1_summaries.keys()) & set(system2_summaries.keys())
+    print(f"Found {len(common_ids)} common transcripts between both systems.")
+
+    if len(common_ids) == 0:
+        print("Error: No common files found between the two systems.")
+        return None
+
+    # Calculate metrics for each file
+    metrics = {
+        "word_count": {"System 1": [], "System 2": []},
+        "sentence_count": {"System 1": [], "System 2": []},
+        "vocabulary_richness": {"System 1": [], "System 2": []},
+        "rouge1_f": [],
+        "rouge2_f": [],
+        "rougeL_f": [],
+        "semantic_similarity": []
+    }
+
+    for file_id in tqdm(common_ids, desc="Calculating metrics"):
+        summary1 = system1_summaries[file_id]
+        summary2 = system2_summaries[file_id]
+
+        # Skip empty summaries
+        if not summary1 or not summary2:
+            print(f"Warning: Empty summary found for {file_id}, skipping.")
+            continue
+
+        # Calculate basic metrics
+        metrics["word_count"]["System 1"].append(len(summary1.split()))
+        metrics["word_count"]["System 2"].append(len(summary2.split()))
+
+        metrics["sentence_count"]["System 1"].append(len(sent_tokenize(summary1)))
+        metrics["sentence_count"]["System 2"].append(len(sent_tokenize(summary2)))
+
+        # Vocabulary richness
+        tokens1 = word_tokenize(summary1.lower())
+        tokens2 = word_tokenize(summary2.lower())
+
+        if tokens1:
+            metrics["vocabulary_richness"]["System 1"].append(len(set(tokens1)) / len(tokens1))
+        else:
+            metrics["vocabulary_richness"]["System 1"].append(0)
+
+        if tokens2:
+            metrics["vocabulary_richness"]["System 2"].append(len(set(tokens2)) / len(tokens2))
+        else:
+            metrics["vocabulary_richness"]["System 2"].append(0)
+
+        # ROUGE scores
+        try:
+            rouge_scores = rouge_scorer_obj.score(summary1, summary2)
+            metrics["rouge1_f"].append(rouge_scores["rouge1"].fmeasure)
+            metrics["rouge2_f"].append(rouge_scores["rouge2"].fmeasure)
+            metrics["rougeL_f"].append(rouge_scores["rougeL"].fmeasure)
+        except Exception as e:
+            print(f"Error calculating ROUGE scores for {file_id}: {e}")
+            metrics["rouge1_f"].append(0)
+            metrics["rouge2_f"].append(0)
+            metrics["rougeL_f"].append(0)
+
+        # Semantic similarity
+        try:
+            vectorizer = TfidfVectorizer()
+            tfidf_matrix = vectorizer.fit_transform([summary1, summary2])
+            metrics["semantic_similarity"].append(cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0])
+        except Exception as e:
+            print(f"Error calculating semantic similarity for {file_id}: {e}")
+            metrics["semantic_similarity"].append(0)
+
+    # Check if we have any valid metrics
+    if not metrics["word_count"]["System 1"]:
+        print("Error: No valid metrics could be calculated after filtering empty summaries.")
+        return None
+
+    # Calculate means for each metric
+    summary = {
+        "word_count": {
+            "System 1": np.mean(metrics["word_count"]["System 1"]),
+            "System 2": np.mean(metrics["word_count"]["System 2"])
+        },
+        "sentence_count": {
+            "System 1": np.mean(metrics["sentence_count"]["System 1"]),
+            "System 2": np.mean(metrics["sentence_count"]["System 2"])
+        },
+        "vocabulary_richness": {
+            "System 1": np.mean(metrics["vocabulary_richness"]["System 1"]),
+            "System 2": np.mean(metrics["vocabulary_richness"]["System 2"])
+        },
+        "rouge1_f": np.mean(metrics["rouge1_f"]),
+        "rouge2_f": np.mean(metrics["rouge2_f"]),
+        "rougeL_f": np.mean(metrics["rougeL_f"]),
+        "semantic_similarity": np.mean(metrics["semantic_similarity"])
+    }
+
+    # Create radar chart
+    categories = [
+        "Word Count", 
+        "Sentence Count",
+        "Vocabulary Richness", 
+        "ROUGE-1",
+        "ROUGE-2",
+        "ROUGE-L",
+        "Semantic Similarity"
+    ]
+
+    # Normalize values for radar chart with reasonable caps
+    max_word_count = max(summary["word_count"]["System 1"], summary["word_count"]["System 2"])
+    max_word_count = min(max_word_count, 500)  # Cap at reasonable maximum
+
+    max_sentence_count = max(summary["sentence_count"]["System 1"], summary["sentence_count"]["System 2"])
+    max_sentence_count = min(max_sentence_count, 30)  # Cap at reasonable maximum
+
+    # Get values for System 1
+    system1_values = [
+        min(summary["word_count"]["System 1"] / max_word_count, 1.0),
+        min(summary["sentence_count"]["System 1"] / max_sentence_count, 1.0),
+        summary["vocabulary_richness"]["System 1"],
+        summary["rouge1_f"],
+        summary["rouge2_f"],
+        summary["rougeL_f"],
+        summary["semantic_similarity"]
+    ]
+
+    # Get values for System 2
+    system2_values = [
+        min(summary["word_count"]["System 2"] / max_word_count, 1.0),
+        min(summary["sentence_count"]["System 2"] / max_sentence_count, 1.0),
+        summary["vocabulary_richness"]["System 2"],
+        summary["rouge1_f"],
+        summary["rouge2_f"],
+        summary["rougeL_f"],
+        summary["semantic_similarity"]
+    ]
+
+    # Ensure values are in range [0,1] for radar chart
+    system1_values = [min(max(0, v), 1) for v in system1_values]
+    system2_values = [min(max(0, v), 1) for v in system2_values]
+
+    # Number of variables
+    N = len(categories)
+
+    # Create angles for each metric
+    angles = [n / float(N) * 2 * np.pi for n in range(N)]
+    angles += angles[:1]  # Close the loop
+
+    # Add values for the loop closure
+    system1_values += system1_values[:1]
+    system2_values += system2_values[:1]
+
+    # Create radar chart
+    plt.figure(figsize=(14, 12))
+    ax = plt.subplot(111, polar=True)
+
+    # Plot System 1
+    ax.plot(angles, system1_values, 'o-', linewidth=2, label="System 1", color="#3498db")
+    ax.fill(angles, system1_values, alpha=0.25, color="#3498db")
+
+    # Plot System 2
+    ax.plot(angles, system2_values, 'o-', linewidth=2, label="System 2", color="#e74c3c")
+    ax.fill(angles, system2_values, alpha=0.25, color="#e74c3c")
+
+    # Set labels and formatting
+    plt.xticks(angles[:-1], categories, size=14)
+
+    # Improve label positioning to avoid overlap
+    for label, angle in zip(ax.get_xticklabels(), angles[:-1]):
+        if angle < np.pi/2 or angle > 3*np.pi/2:
+            label.set_horizontalalignment('left')
+        else:
+            label.set_horizontalalignment('right')
+
+    ax.set_title("Transcript Summarizer Comparison", size=20, pad=20)
+
+    # Add axis labels with actual values
+    for i, angle in enumerate(angles[:-1]):
+        if i == 0:  # Word count
+            ax.text(angle, 1.1, f"Max: {int(max_word_count)} words", 
+                   ha='center', va='center', size=10)
+        elif i == 1:  # Sentence count
+            ax.text(angle, 1.1, f"Max: {int(max_sentence_count)} sentences", 
+                   ha='center', va='center', size=10)
+
+    # Add legend with metrics
+    legend = plt.legend(loc="upper right", bbox_to_anchor=(0.1, 0.1))
+
+    # Add a text box with key statistics
+    textstr = '\n'.join((
+        f"Word Count: System 1 = {int(summary['word_count']['System 1'])}, System 2 = {int(summary['word_count']['System 2'])}",
+        f"Sentence Count: System 1 = {summary['sentence_count']['System 1']:.1f}, System 2 = {summary['sentence_count']['System 2']:.1f}",
+        f"Vocabulary Richness: System 1 = {summary['vocabulary_richness']['System 1']:.3f}, System 2 = {summary['vocabulary_richness']['System 2']:.3f}",
+        f"Semantic Similarity: {summary['semantic_similarity']:.3f}",
+        f"ROUGE-1: {summary['rouge1_f']:.3f}",
+        f"ROUGE-2: {summary['rouge2_f']:.3f}",
+        f"ROUGE-L: {summary['rougeL_f']:.3f}"
+    ))
+
+    # Create a text box at the bottom
+    plt.figtext(0.5, 0.01, textstr, ha="center", fontsize=12, 
+                bbox={"facecolor":"white", "alpha":0.8, "pad":5, "boxstyle":"round,pad=0.5"})
+
+    plt.tight_layout()
+
+    # Save the visualization
+    try:
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        plt.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"Radar chart saved to {output_path}")
+    except Exception as e:
+        print(f"Error saving radar chart: {e}")
+    finally:
+        plt.close()
+
+    return summary
+
+def main():
+    parser = argparse.ArgumentParser(description="Compare two summarizer systems with a single visualization")
+    parser.add_argument("--summaries1", required=True, help="Directory containing summaries from system 1")
+    parser.add_argument("--summaries2", required=True, help="Directory containing summaries from system 2")
+    parser.add_argument("--output_dir", default="./comparison_results", help="Directory to save comparison results")
+
+    args = parser.parse_args()
+
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+
+    # Load summaries
+    print("Loading summaries...")
+    system1_summaries = load_summaries(args.summaries1)
+    system2_summaries = load_summaries(args.summaries2)
+
+    if not system1_summaries:
+        print(f"Error: No valid summaries found in {args.summaries1}")
+        return
+
+    if not system2_summaries:
+        print(f"Error: No valid summaries found in {args.summaries2}")
+        return
+
+    # Create the comparison chart
+    print("Creating comparison visualization...")
+    summary = create_comparison_radar_chart(
+        system1_summaries, 
+        system2_summaries, 
+        output_dir / "summarizer_comparison_radar.png"
+    )
+
+    if summary:
+        # Save the summary data
+        try:
+            with open(output_dir / "comparison_summary.json", "w") as f:
+                json.dump(summary, f, indent=2)
+            print(f"Comparison complete! Results saved to {output_dir}")
+        except Exception as e:
+            print(f"Error saving summary data: {e}")
+    else:
+        print("Comparison failed. Please check the error messages above.")
+
+if __name__ == "__main__":
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -165,4 +165,4 @@ nodedatabase.db @@
     data/*
     upload_all.py
     eval/*
-    logs/*
+    logs/*.DS_Store