diff --git a/.gitmodules b/.gitmodules
index b0a66e4..e4e3b30 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
 [submodule "bin/scripts/downloadpublicdata"]
 	path = bin/scripts/downloadpublicdata
 	url = https://github.com/Wang-Bioinformatics-Lab/downloadpublicdata.git
+[submodule "bin/NextflowModules"]
+	path = bin/NextflowModules
+	url = https://github.com/Wang-Bioinformatics-Lab/NextflowModules.git
diff --git a/Makefile b/Makefile
index 8c0d434..1edcc5d 100644
--- a/Makefile
+++ b/Makefile
@@ -6,4 +6,7 @@ run_usi_download:
 	--download_usi_filename=./data/usi_files/input_files.tsv --cache_directory=./data/cache
 
 run_transitive:
-	nextflow run ./nf_workflow.nf --resume -c nextflow.config --topology=transitive
\ No newline at end of file
+	nextflow run ./nf_workflow.nf --resume -c nextflow.config --topology=transitive
+
+init_modules:
+	git submodule update --init --recursive
\ No newline at end of file
diff --git a/bin/NextflowModules b/bin/NextflowModules
new file mode 160000
index 0000000..f357f3a
--- /dev/null
+++ b/bin/NextflowModules
@@ -0,0 +1 @@
+Subproject commit f357f3ae993553b011347a5af9d46d9318ff7b20
diff --git a/bin/conda_env_falcon.yml b/bin/conda_env_falcon.yml
new file mode 100644
index 0000000..1bb31cf
--- /dev/null
+++ b/bin/conda_env_falcon.yml
@@ -0,0 +1,21 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+channel_priority: strict
+dependencies:
+  - python=3.10
+  - pandas=2.2.0
+  - pyteomics=4.7.1
+  - pip:
+    - numpy==1.23.4
+    - xmltodict
+    - requests
+    - tqdm
+    - psutil
+    - pyopenms==3.0.0
+    - spectrum-utils==0.3.5
+    - falcon-ms
+    - pymzml
+    - numcodecs
+    - pyarrow
diff --git a/bin/scripts/convert_falcon_to_mscluster_format.py b/bin/scripts/convert_falcon_to_mscluster_format.py
new file mode 100755
index 0000000..e1310f7
--- /dev/null
+++ b/bin/scripts/convert_falcon_to_mscluster_format.py
@@ -0,0 +1,248 @@
+#!/usr/bin/python
+
+import sys
+import os
+import argparse
+import pandas as pd
+
+
+
+def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_clusterinfo, output_clustersummary, min_cluster_size=2):
+    """
+    Convert falcon output to mscluster format.
+    
+    Falcon format: cluster, filename, scan, precursor_mz, retention_time, new_batch
+    MSCluster format: #ClusterIdx, #Filename, #SpecIdx, #Scan, #ParentMass, #Charge, #RetTime, #PrecIntensity
+    
+    Note: If falcon doesn't have certain fields (like charge, intensity), we use default values (0)
+    instead of fetching from MGF files.
+    """
+    # Load falcon CSV
+    clusterinfo_df = pd.read_csv(falcon_csv, sep=',', comment='#')
+    
+    print(f"Loaded {len(clusterinfo_df)} rows from falcon CSV")
+    print(f"Columns: {clusterinfo_df.columns.tolist()}")
+    
+    
+    if 'identifier' in clusterinfo_df.columns:
+
+        clusterinfo_df["filename"] = clusterinfo_df["identifier"].apply(
+            lambda x: x.split(":")[2] + ".mzML" if len(x.split(":")) > 2 else (x.split(":")[-2] + ".mzML" if len(x.split(":")) > 1 else "unknown.mzML")
+        )
+        clusterinfo_df["scan"] = clusterinfo_df["identifier"].apply(
+            lambda x: int(x.split(":")[-1]) if x.split(":")[-1].isdigit() else 0
+        )
+    
+    # Ensure we have the required columns
+    required_cols = ['cluster', 'filename', 'scan', 'precursor_mz', 'retention_time']
+    missing_cols = [col for col in required_cols if col not in clusterinfo_df.columns]
+    
+    if missing_cols:
+        print(f"ERROR: Required columns not found in falcon CSV: {missing_cols}")
+        print(f"Available columns: {clusterinfo_df.columns.tolist()}")
+        print(f"First few rows:")
+        print(clusterinfo_df.head())
+        sys.exit(1)
+    
+    if min_cluster_size > 1:
+        clusterinfo_df = clusterinfo_df[clusterinfo_df['cluster'] != -1]
+    
+    # Convert to mscluster format
+    mscluster_rows = []
+    spec_idx_counter = 0
+    
+    for idx, row in clusterinfo_df.iterrows():
+        cluster_idx = int(row['cluster'])
+        
+        # Skip singletons if min_cluster_size > 1
+        if cluster_idx == -1 and min_cluster_size > 1:
+            continue
+        
+        # Handle cluster indexing: falcon uses 0-based, mscluster uses 1-based
+        # But we also need to handle -1 (singletons)
+        if cluster_idx == -1:
+            # Singletons: use a large number or handle separately
+            cluster_idx = 999999  # Use a large number for singletons
+        else:
+            cluster_idx = cluster_idx + 1  # Convert to 1-based
+        
+        filename = str(row['filename'])
+        scan = int(row['scan'])
+        precursor_mz = float(row['precursor_mz'])
+        retention_time = float(row['retention_time'])
+        
+        # Use default values for fields that falcon doesn't provide
+        # Don't fetch from MGF files - just use defaults
+        charge = 0
+        precursor_intensity = 0.0
+        
+   
+        if 'precursor_charge' in row and pd.notna(row['precursor_charge']):
+            try:
+                charge = int(row['precursor_charge'])
+            except (ValueError, TypeError):
+                charge = 0
+        elif 'charge' in row and pd.notna(row['charge']):
+            try:
+                charge = int(row['charge'])
+            except (ValueError, TypeError):
+                charge = 0
+        
+        if 'precursor_intensity' in row and pd.notna(row['precursor_intensity']):
+            try:
+                precursor_intensity = float(row['precursor_intensity'])
+            except (ValueError, TypeError):
+                precursor_intensity = 0.0
+        
+        # Convert retention time to seconds if it's in minutes
+        # Falcon typically outputs RT in minutes, mscluster uses seconds
+        if retention_time > 0 and retention_time < 1000:  # Likely in minutes
+            retention_time = retention_time * 60.0
+        
+        if not filename.startswith('input_spectra/'):
+            filename = f"input_spectra/{filename}"
+        
+        spec_idx = spec_idx_counter
+        spec_idx_counter += 1
+        
+        mscluster_row = {
+            '#ClusterIdx': cluster_idx,
+            '#Filename': filename,
+            '#SpecIdx': spec_idx,
+            '#Scan': scan,
+            '#ParentMass': precursor_mz,
+            '#Charge': charge,
+            '#RetTime': retention_time,
+            '#PrecIntensity': precursor_intensity
+        }
+        mscluster_rows.append(mscluster_row)
+    
+    # Create DataFrame
+    mscluster_df = pd.DataFrame(mscluster_rows)
+    
+    # IMPORTANT: Before filtering, save the original falcon cluster ID (0-based) for each row
+    # This will help us match falcon MGF clusters to clusterinfo clusters later
+    # The original falcon cluster ID is stored in the 'cluster' column from falcon.csv
+    # We need to track: original_falcon_cluster (0-based) -> sequential_index (1-based)
+    
+    # First, add a column to track original falcon cluster (0-based) before filtering
+    # We'll need to reconstruct this from the cluster_idx we converted
+    # cluster_idx was converted from falcon's 0-based to 1-based, so original = cluster_idx - 1
+    mscluster_df['_original_falcon_cluster'] = mscluster_df['#ClusterIdx'] - 1
+    
+    # Filter by min_cluster_size
+    if min_cluster_size > 1:
+        # Count spectra per cluster
+        cluster_counts = mscluster_df['#ClusterIdx'].value_counts()
+        valid_clusters = cluster_counts[cluster_counts >= min_cluster_size].index
+        mscluster_df = mscluster_df[mscluster_df['#ClusterIdx'].isin(valid_clusters)]
+    
+    # IMPORTANT: Remap cluster indices to sequential (1, 2, 3, ...) for consistency
+    # This ensures clusterinfo.tsv, clustersummary.tsv, and MGF SCANS all use the same sequential indices
+    # This is necessary for compatibility with ExecMolecularParallelPairs which uses index-based CLUSTERID
+    original_clusters = sorted(mscluster_df['#ClusterIdx'].unique())
+    cluster_remap = {orig: new for new, orig in enumerate(original_clusters, start=1)}
+    
+    # Create mapping: original falcon cluster (0-based) -> sequential index (1-based)
+    # This mapping will be used in falcon_wrapper.py to match MGF clusters
+    falcon_cluster_to_sequential = {}
+    for orig_cluster_idx in original_clusters:
+        original_falcon_cluster = orig_cluster_idx - 1  # Convert back to 0-based
+        sequential_idx = cluster_remap[orig_cluster_idx]
+        falcon_cluster_to_sequential[original_falcon_cluster] = sequential_idx
+    
+    # Remap #ClusterIdx in mscluster_df
+    mscluster_df['#ClusterIdx'] = mscluster_df['#ClusterIdx'].map(cluster_remap)
+    
+    # Remove the temporary column
+    mscluster_df = mscluster_df.drop(columns=['_original_falcon_cluster'])
+    
+    # Create cluster summary with remapped indices
+    cluster_summary_rows = []
+    for cluster_idx in sorted(mscluster_df['#ClusterIdx'].unique()):
+        cluster_data = mscluster_df[mscluster_df['#ClusterIdx'] == cluster_idx]
+        num_spectra = len(cluster_data)
+        
+        # Calculate mean RT (in minutes)
+        mean_rt = cluster_data['#RetTime'].mean() / 60.0
+        
+        # Calculate parent mass (mean of #ParentMass)
+        parent_mass = cluster_data['#ParentMass'].mean()
+        
+        # Calculate precursor mass (parent mass / charge, then take mean)
+        # If charge is 0, use parent mass directly
+        precursor_masses = []
+        for idx, row in cluster_data.iterrows():
+            if row['#Charge'] > 0:
+                precursor_masses.append(row['#ParentMass'] / row['#Charge'])
+            else:
+                precursor_masses.append(row['#ParentMass'])
+        precursor_mass = sum(precursor_masses) / len(precursor_masses) if precursor_masses else parent_mass
+        
+        # Calculate precursor charge (most common charge, or mean if no clear mode)
+        charges = cluster_data['#Charge'].values
+        charges_nonzero = charges[charges > 0]
+        if len(charges_nonzero) > 0:
+            # Use mode (most common charge) using pandas
+            charge_series = pd.Series(charges_nonzero)
+            mode_values = charge_series.mode()
+            if len(mode_values) > 0:
+                precursor_charge = int(mode_values.iloc[0])
+            else:
+                precursor_charge = int(charges_nonzero.mean())
+        else:
+            precursor_charge = 0
+        
+        # Calculate sum of precursor intensity
+        sum_precursor_intensity = cluster_data['#PrecIntensity'].sum()
+        
+        cluster_summary_row = {
+            'cluster index': cluster_idx,  # Already remapped to sequential
+            'number of spectra': num_spectra,
+            'parent mass': parent_mass,
+            'precursor charge': precursor_charge,
+            'precursor mass': precursor_mass,
+            'sum(precursor intensity)': sum_precursor_intensity,
+            'RTMean': mean_rt
+        }
+        cluster_summary_rows.append(cluster_summary_row)
+    
+    cluster_summary_df = pd.DataFrame(cluster_summary_rows)
+    cluster_summary_df = cluster_summary_df.sort_values('cluster index')
+    
+    # Ensure cluster index is string type to match network graph node types
+    # Network graph nodes from pairs file (CLUSTERID1/CLUSTERID2) are typically strings
+    # This ensures type matching when add_clusterinfo_summary_to_graph checks "if cluster_index in G"
+    cluster_summary_df['cluster index'] = cluster_summary_df['cluster index'].astype(str)
+    
+    # Save outputs
+    mscluster_df.to_csv(output_clusterinfo, sep='\t', index=False)
+    cluster_summary_df.to_csv(output_clustersummary, sep='\t', index=False)
+    
+    print(f"Converted {len(mscluster_df)} spectra in {len(cluster_summary_df)} clusters")
+    print(f"Saved clusterinfo to {output_clusterinfo}")
+    print(f"Saved clustersummary to {output_clustersummary}")
+    print(f"Note: Cluster indices have been remapped to sequential (1, 2, 3, ...) for consistency")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert Falcon output to MSCluster format')
+    parser.add_argument('falcon_csv', help='Falcon CSV output file')
+    parser.add_argument('input_spectra_folder', help='Input spectra folder (for reference, not used for fetching data)')
+    parser.add_argument('output_clusterinfo', help='Output clusterinfo.tsv file')
+    parser.add_argument('output_clustersummary', help='Output clustersummary.tsv file')
+    parser.add_argument('--min_cluster_size', type=int, default=2, help='Minimum cluster size')
+    
+    args = parser.parse_args()
+    
+    convert_falcon_to_mscluster_format(
+        args.falcon_csv,
+        args.input_spectra_folder,
+        args.output_clusterinfo,
+        args.output_clustersummary,
+        args.min_cluster_size
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/scripts/falcon_wrapper.py b/bin/scripts/falcon_wrapper.py
new file mode 100755
index 0000000..f984a71
--- /dev/null
+++ b/bin/scripts/falcon_wrapper.py
@@ -0,0 +1,304 @@
+#!/usr/bin/python
+
+import sys
+import os
+import glob
+import argparse
+import subprocess
+import pandas as pd
+import ming_spectrum_library
+
+def run_falcon(input_spectra_path, output_prefix="falcon", 
+               precursor_tol="20 ppm", fragment_tol=0.05,
+               min_mz_range=0, min_mz=0, max_mz=30000, eps=0.1):
+    """
+    Runs Falcon with specified parameters.
+    input_spectra_path can be a file or a folder.
+    """
+    # Check if input is a file or folder
+    if os.path.isfile(input_spectra_path):
+        # Single file input
+        all_spectrum_files = [input_spectra_path]
+    elif os.path.isdir(input_spectra_path):
+        # Folder input - list all spectrum files
+        all_mgf_files = glob.glob(os.path.join(input_spectra_path, "*.mgf"))
+        all_mzxml_files = glob.glob(os.path.join(input_spectra_path, "*.mzXML"))
+        all_mzml_files = glob.glob(os.path.join(input_spectra_path, "*.mzML"))
+        
+        all_spectrum_files = all_mgf_files + all_mzxml_files + all_mzml_files
+        
+        if len(all_spectrum_files) == 0:
+            print(f"ERROR: No spectrum files found in {input_spectra_path}")
+            sys.exit(1)
+        
+        # Sort these filenames
+        all_spectrum_files.sort()
+    else:
+        print(f"ERROR: Input path does not exist: {input_spectra_path}")
+        sys.exit(1)
+    
+    # Create pattern for falcon (it accepts wildcards or file list)
+    if len(all_spectrum_files) == 1:
+        mzml_pattern = all_spectrum_files[0]
+    else:
+        # Falcon can accept multiple files, we'll pass them as space-separated
+        mzml_pattern = " ".join(all_spectrum_files)
+    
+    # Parse precursor_tol - falcon expects two arguments
+    # Format can be "2.0" (just number) or "20 ppm" (value and unit)
+    # Falcon needs two arguments, so we need to handle both cases
+    precursor_tol_str = str(precursor_tol).strip()
+    if " " in precursor_tol_str:
+        # Already has format like "20 ppm" or "2.0 Da" - split into two args
+        parts = precursor_tol_str.split(None, 1)  # Split on first space
+        if len(parts) == 2:
+            precursor_tol_args = f"{parts[0]} {parts[1]}"
+        else:
+            # Fallback: use value twice
+            precursor_tol_args = f"{parts[0]} {parts[0]}"
+    else:
+        # Just a number, assume Da and use twice (min and max tolerance)
+        try:
+            tol_value = float(precursor_tol_str)
+            precursor_tol_args = f"{tol_value} {tol_value}"
+        except ValueError:
+            # If can't parse, use default
+            precursor_tol_args = "2.0 2.0"
+    
+    command = (
+        f"falcon {mzml_pattern} {output_prefix} "
+        f"--export_representatives "
+        f"--precursor_tol {precursor_tol_args} "
+        f"--fragment_tol {fragment_tol} "
+        f"--min_mz_range {min_mz_range} "
+        f"--min_mz {min_mz} --max_mz {max_mz} "
+        f"--eps {eps} "
+        f"--hash_len 400 "
+        f"--n_neighbors_ann 64 "
+        f"--n_probe 16 "
+        f"--batch_size 32768 "
+    )
+    print(f"[run_falcon] Running: {command}")
+    process = subprocess.Popen(command, shell=True)
+    retcode = process.wait()
+    if retcode != 0:
+        print(f"ERROR: Falcon failed with exit code {retcode}")
+        sys.exit(1)
+
+
+def main():
+    # Parse arguments
+    parser = argparse.ArgumentParser(description='Falcon Clustering Wrapper')
+    parser.add_argument('input_spectra_folder', help='Input Spectra Folder')
+    parser.add_argument('output_spectra_folder', help='Output Spectra Folder')
+    parser.add_argument('final_output_folder', help='final_output_folder')
+    
+    parser.add_argument('--min_cluster_size', default="2", help='min_cluster_size (not used by falcon directly)')
+    
+    parser.add_argument('--pm_tolerance', default="20 ppm", help='pm_tolerance (precursor tolerance)')
+    parser.add_argument('--fragment_tolerance', default="0.05", help='fragment_tolerance')
+    
+    # Filters (not all are used by falcon)
+    parser.add_argument('--min_peak_intensity', default="0.0", help='min_peak_intensity (not used by falcon)')
+    parser.add_argument('--window_filter', default="1", help='window_filter (not used by falcon)')
+    parser.add_argument('--precursor_filter', default="1", help='precursor_filter (not used by falcon)')
+    
+    # Falcon-specific parameters
+    parser.add_argument('--eps', default="0.1", help='Falcon eps parameter')
+    parser.add_argument('--min_mz', default="0", help='Falcon min_mz parameter')
+    parser.add_argument('--max_mz', default="30000", help='Falcon max_mz parameter')
+    
+    args = parser.parse_args()
+    
+    # Check if input is a file or folder (Nextflow may pass a file)
+    input_spectra_path = args.input_spectra_folder
+    if os.path.isfile(args.input_spectra_folder):
+        # If it's a single file, we need to use its directory
+        input_spectra_path = os.path.dirname(args.input_spectra_folder)
+        if not input_spectra_path:
+            input_spectra_path = "."
+    
+    # Running falcon
+    output_prefix = "falcon"
+    run_falcon(input_spectra_path, output_prefix,
+               precursor_tol=args.pm_tolerance,
+               fragment_tol=float(args.fragment_tolerance),
+               min_mz_range=0,
+               min_mz=int(args.min_mz),
+               max_mz=int(args.max_mz),
+               eps=float(args.eps))
+    
+    # Falcon outputs:
+    # - falcon.csv (cluster assignments)
+    # - falcon.mgf (representative spectra)
+    
+    falcon_csv = f"{output_prefix}.csv"
+    falcon_mgf = f"{output_prefix}.mgf"
+    
+    if not os.path.exists(falcon_csv):
+        print(f"ERROR: Falcon output {falcon_csv} not found")
+        sys.exit(1)
+    
+    if not os.path.exists(falcon_mgf):
+        print(f"ERROR: Falcon output {falcon_mgf} not found")
+        sys.exit(1)
+    
+    # Convert falcon output to mscluster format first
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    convert_script = os.path.join(script_dir, "convert_falcon_to_mscluster_format.py")
+    
+    clusterinfo_file = os.path.join(args.final_output_folder, "clusterinfo.tsv")
+    clustersummary_file = os.path.join(args.final_output_folder, "clustersummary.tsv")
+    
+    # For conversion, we need the original input path to get spectrum info
+    convert_input_path = input_spectra_path
+    
+    convert_cmd = (
+        f"python {convert_script} "
+        f"{falcon_csv} "
+        f"{convert_input_path} "
+        f"{clusterinfo_file} "
+        f"{clustersummary_file} "
+        f"--min_cluster_size {args.min_cluster_size}"
+    )
+    
+    print(f"Converting falcon output to mscluster format...")
+    print(f"Running: {convert_cmd}")
+    ret_code = os.system(convert_cmd)
+    
+    if ret_code != 0:
+        print(f"ERROR: Conversion failed with exit code {ret_code}")
+        sys.exit(1)
+    
+    # Now update MGF file with correct scan numbers based on clusterinfo.tsv
+    # IMPORTANT: clusterinfo.tsv now uses sequential indices (1, 2, 3, ...)
+    # The scan number in MGF should match the #ClusterIdx in clusterinfo.tsv (which is already sequential)
+    # Read clusterinfo to get cluster indices
+    clusterinfo_df = pd.read_csv(clusterinfo_file, sep='\t')
+    
+    # Get unique cluster indices (these are already sequential: 1, 2, 3, ...)
+    unique_clusters = sorted(clusterinfo_df['#ClusterIdx'].unique())
+    
+    # Read falcon MGF file - parse manually to access cluster field
+    output_mgf_filename = os.path.join(args.final_output_folder, "specs_ms.mgf")
+    os.makedirs(args.final_output_folder, exist_ok=True)
+    
+    # Parse falcon MGF to get cluster information and update scan numbers
+    mgf_spectra = []
+    with open(falcon_mgf, 'r') as f:
+        spec = None
+        for line in f:
+            line = line.strip()
+            if line == 'BEGIN IONS':
+                spec = {'peaks': [], 'headers': {}}
+            elif line == 'END IONS':
+                if spec is not None:
+                    mgf_spectra.append(spec)
+                spec = None
+            elif spec is not None:
+                if '=' in line:
+                    key, val = line.split('=', 1)
+                    spec['headers'][key.upper()] = val
+                else:
+                    # Peak data
+                    parts = line.split()
+                    if len(parts) == 2:
+                        try:
+                            mz, intensity = float(parts[0]), float(parts[1])
+                            spec['peaks'].append((mz, intensity))
+                        except:
+                            pass
+    
+    # IMPORTANT: clusterinfo.tsv now uses sequential indices (1, 2, 3, ...)
+    # We need to map falcon clusters (0-based) to these sequential indices
+    # Rebuild the mapping by reading falcon.csv and matching with clusterinfo.tsv
+    # This avoids needing a separate JSON file that might not be passed through Nextflow
+    
+    # Read falcon.csv to get original falcon cluster assignments
+    falcon_cluster_to_sequential = {}
+    if os.path.exists(falcon_csv):
+        falcon_df = pd.read_csv(falcon_csv, sep=',', comment='#')
+        
+        # Filter by min_cluster_size (same as in convert_falcon_to_mscluster_format.py)
+        if int(args.min_cluster_size) > 1:
+            falcon_df = falcon_df[falcon_df['cluster'] != -1]
+        
+        # Group by original falcon cluster (0-based)
+        # The sequential index in clusterinfo corresponds to the order after filtering
+        # We'll match by grouping falcon clusters and assigning sequential indices
+        original_clusters = sorted(falcon_df['cluster'].unique())
+        sequential_idx = 1
+        for orig_falcon_cluster in original_clusters:
+            # Count spectra in this cluster
+            cluster_spectra = falcon_df[falcon_df['cluster'] == orig_falcon_cluster]
+            if len(cluster_spectra) >= int(args.min_cluster_size):
+                falcon_cluster_to_sequential[orig_falcon_cluster] = sequential_idx
+                sequential_idx += 1
+        
+        print(f"Rebuilt falcon cluster mapping: {len(falcon_cluster_to_sequential)} clusters")
+    else:
+        print(f"WARNING: falcon.csv not found at {falcon_csv}, cannot rebuild mapping")
+        falcon_cluster_to_sequential = {}
+    
+    # Write MGF with sequential scan numbers matching clusterinfo.tsv
+    # Since clusterinfo.tsv already has sequential indices, we'll use them directly
+    skipped_count = 0
+    processed_clusters = set()  # Track which sequential cluster indices we've processed
+    
+    with open(output_mgf_filename, 'w') as out_mgf:
+        for spec in mgf_spectra:
+            # Get cluster ID from falcon MGF
+            falcon_cluster = -1
+            if 'CLUSTER' in spec['headers']:
+                try:
+                    falcon_cluster = int(spec['headers']['CLUSTER'])
+                except (ValueError, TypeError):
+                    pass
+            
+            # Map falcon cluster (0-based) to sequential index (1-based)
+            if falcon_cluster in falcon_cluster_to_sequential:
+                sequential_idx = falcon_cluster_to_sequential[falcon_cluster]
+                scan_number = sequential_idx  # Use sequential index as SCANS
+                
+                # Write MGF entry with sequential scan number
+                out_mgf.write("BEGIN IONS\n")
+                
+                # Write headers, updating SCANS= line
+                for key, val in spec['headers'].items():
+                    if key == 'SCANS':
+                        out_mgf.write(f"SCANS={scan_number}\n")
+                    else:
+                        out_mgf.write(f"{key}={val}\n")
+                
+                # Add SCANS if it wasn't in the original
+                if 'SCANS' not in spec['headers']:
+                    out_mgf.write(f"SCANS={scan_number}\n")
+                
+                # Write peaks
+                for mz, intensity in spec['peaks']:
+                    out_mgf.write(f"{mz} {intensity}\n")
+                
+                out_mgf.write("END IONS\n\n")
+                processed_clusters.add(sequential_idx)
+            else:
+                # Skip clusters that are not in the mapping (filtered out by min_cluster_size)
+                skipped_count += 1
+                if falcon_cluster != -1:  # Only warn for non-singleton clusters
+                    print(f"INFO: Skipping falcon cluster {falcon_cluster} (filtered out by min_cluster_size or not in mapping)")
+    
+    if skipped_count > 0:
+        print(f"INFO: Skipped {skipped_count} spectra that were filtered out by min_cluster_size")
+    
+    print(f"Wrote {len(processed_clusters)} spectra to MGF file with sequential SCANS (1, 2, 3, ...)")
+    
+    if skipped_count > 0:
+        print(f"INFO: Skipped {skipped_count} spectra that were filtered out by min_cluster_size")
+    
+    print(f"Falcon clustering completed. Output files:")
+    print(f"  - MGF: {output_mgf_filename}")
+    print(f"  - ClusterInfo: {clusterinfo_file}")
+    print(f"  - ClusterSummary: {clustersummary_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/scripts/summarize_results_falcon.py b/bin/scripts/summarize_results_falcon.py
new file mode 100644
index 0000000..d6ec09e
--- /dev/null
+++ b/bin/scripts/summarize_results_falcon.py
@@ -0,0 +1,88 @@
+import os
+import pandas as pd
+import argparse
+import numpy as np
+
+def rewrite_falcon_mgf(mgf_input, mgf_output):
+    with open(mgf_input, 'r') as infile, open(mgf_output, 'w') as outfile:
+        for line in infile:
+            if line.startswith("CLUSTER="):
+                try:
+                    cluster_num = int(line.strip().split("=")[1])
+                    outfile.write(f"SCANS={cluster_num + 1}\n")
+                except ValueError:
+                    outfile.write(line)
+            else:
+                outfile.write(line)
+
+def main():
+    parser = argparse.ArgumentParser(description='Summarizing Falcon Results')
+    parser.add_argument('falcon_clusters', help='falcon_clusters')
+    parser.add_argument('falcon_mgf', help='falcon_mgf')
+    #parser.add_argument('output_summary_folder', help='output_summary_folder')
+    args = parser.parse_args()
+
+    clusterinfo_df = pd.read_csv(args.falcon_clusters, sep=',', comment='#')
+
+    print(args)
+    print(clusterinfo_df)
+
+    clusterinfo_df = clusterinfo_df.sort_values(by='cluster', key=lambda x: x.replace(-1, np.inf))
+
+    # Filtering out not in clusters data
+    clusterinfo_df = clusterinfo_df[clusterinfo_df["cluster"] != -1]
+
+
+    # Grouping by cluster
+    grouped_cluster_df = clusterinfo_df.groupby(["cluster"])
+    cluster_summary_list = []
+    for cluster, cluster_group_df in grouped_cluster_df:
+        #TODO :Read these from mgf, as the representative is a medoid
+
+        cluster_count = len(cluster_group_df)
+        cluster_mz = cluster_group_df["precursor_mz"].mean()
+        cluster_rt = cluster_group_df["retention_time"].mean()
+        cluster_charge = cluster_group_df["precursor_charge"].mean()
+        # adjust the col name to map the classical MN wokflow
+        output_dict = {}
+        output_dict["number of spectra"] = cluster_count
+        output_dict["parent mass"] = cluster_mz
+        output_dict["RTMean"] = cluster_rt
+        output_dict["precursor charge"] = cluster_charge
+        output_dict["cluster index"] = cluster[0] + 1
+
+        cluster_summary_list.append(output_dict)
+
+    # Creating a cluster summary
+    cluster_summary_df = pd.DataFrame(cluster_summary_list)
+    cluster_summary_df.to_csv("clustersummary.tsv", sep='\t', index=False)
+
+    # Creating cluster info
+    clusterinfo_df["filename"] = clusterinfo_df["identifier"].apply(lambda x: x.split(":")[2] + ".mzML")
+    clusterinfo_df["scan"] = clusterinfo_df["identifier"].apply(lambda x: x.split(":")[-1])
+
+    # Rename relevant columns to MS-Cluster format
+    clusterinfo_df = clusterinfo_df.rename(columns={
+        "filename": "#Filename",
+        "cluster": "#ClusterIdx",
+        "scan": "#Scan",
+        "precursor_mz": "#ParentMass",
+        "precursor_charge": "#Charge",
+        "retention_time": "#RetTime"
+    })
+
+    # Just to prevent other processes in the workflow raise error
+    clusterinfo_df["#PrecIntensity"] = 0
+
+    # Select required columns
+    clusterinfo_df = clusterinfo_df[[
+        "#ClusterIdx", "#Filename", "#Scan", "#ParentMass", "#Charge", "#RetTime",  "#PrecIntensity"
+    ]]
+    clusterinfo_df.to_csv("clusterinfo.tsv", sep='\t', index=False)
+
+    #TODO: Rewriting MGF files
+    rewrite_falcon_mgf(args.falcon_mgf, "specs_ms.mgf")
+    # TODO: Maybe make this compatible with FBMN, since its already clustered.
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/nf_workflow.nf b/nf_workflow.nf
index 865f302..2316d93 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -13,12 +13,20 @@ params.metadata_per_file_grouping = "No" // Yes means that each file can be its
 params.metadata_filename = "data/metadata.tsv"
 
 // Clustering Parameters
+params.clustering_tool = "mscluster" // or "falcon"
 params.min_cluster_size = "2"
 
 // Tolerance Parameters
 params.pm_tolerance = "2.0"
 params.fragment_tolerance = "0.5"
 
+// Falcon-specific parameters
+params.falcon_pm_tolerance = "20 ppm"
+params.falcon_fragment_tolerance = "0.05"
+params.falcon_eps = "0.1"
+params.falcon_min_mz = "0"
+params.falcon_max_mz = "30000"
+
 // Filtering
 params.min_peak_intensity = "0.0"
 params.window_filter = "1"
@@ -66,9 +74,29 @@ params.cache_directory = "data/cache"
 
 params.publishdir = "$baseDir"
 TOOL_FOLDER = "$baseDir/bin"
+MODULES_FOLDER = "$TOOL_FOLDER/NextflowModules"
+
+// COMPATIBILITY NOTE: The following might be necessary if this workflow is being deployed in a slightly different environemnt
+// checking if outdir is defined,
+// if so, then set publishdir to outdir
+if (params.outdir) {
+    _publishdir = params.outdir
+}
+else{
+    _publishdir = params.publishdir
+}
+
+// Augmenting with nf_output
+_publishdir = "${_publishdir}/nf_output"
+
+
+// A lot of useful modules are already implemented and added to the nextflow modules, you can import them to use
+// the publishdir is a key word that we're using around all our modules to control where the output files will be saved
+include {summaryLibrary} from "$MODULES_FOLDER/nf_library_search_modules.nf"
+include {librarygetGNPSAnnotations} from "$MODULES_FOLDER/nf_library_search_modules.nf" addParams(publishdir: "$_publishdir/library")
 
 process filesummary {
-    publishDir "$params.publishdir/nf_output", mode: 'copy'
+    publishDir "$_publishdir", mode: 'copy'
 
     conda "$TOOL_FOLDER/conda_env.yml"
 
@@ -113,6 +141,41 @@ process mscluster {
     """
 }
 
+process falcon {
+    publishDir "$params.publishdir/nf_output", mode: 'copy'
+
+    conda "$TOOL_FOLDER/conda_env_falcon.yml"
+    
+    // This is necessary because the glibc libraries are not always used in the conda environment, and defaults to the system which could be old
+    beforeScript 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib'
+
+    input:
+    file inputSpectra
+    val ready
+
+    output:
+    file 'clustering/specs_ms.mgf'
+    file 'clustering/clusterinfo.tsv'
+    file 'clustering/clustersummary.tsv'
+
+    """
+    mkdir clustering
+    python $TOOL_FOLDER/scripts/falcon_wrapper.py \
+    $inputSpectra \
+    spectra \
+    clustering \
+    --min_cluster_size $params.min_cluster_size \
+    --pm_tolerance "$params.falcon_pm_tolerance" \
+    --fragment_tolerance $params.falcon_fragment_tolerance \
+    --min_peak_intensity $params.min_peak_intensity \
+    --window_filter $params.window_filter \
+    --precursor_filter $params.precursor_filter \
+    --eps $params.falcon_eps \
+    --min_mz $params.falcon_min_mz \
+    --max_mz $params.falcon_max_mz
+    """
+}
+
 // TODO: Finish Implementing this, as this is currently an no-op
 process massqlFilterSpectra {
     publishDir "$params.publishdir/nf_output", mode: 'copy'
@@ -180,28 +243,28 @@ process librarymergeResults {
     """
 }
 
-process librarygetGNPSAnnotations {
-    publishDir "$params.publishdir/nf_output/library", mode: 'copy'
+// process librarygetGNPSAnnotations {
+//     publishDir "$params.publishdir/nf_output/library", mode: 'copy'
 
-    //cache 'lenient'
-    cache 'false'
+//     //cache 'lenient'
+//     cache 'false'
 
-    conda "$TOOL_FOLDER/conda_env.yml"
+//     conda "$TOOL_FOLDER/conda_env.yml"
 
-    input:
-    path "merged_results.tsv"
-    path "library_summary.tsv"
+//     input:
+//     path "merged_results.tsv"
+//     path "library_summary.tsv"
 
-    output:
-    path 'merged_results_with_gnps.tsv'
+//     output:
+//     path 'merged_results_with_gnps.tsv'
 
-    """
-    python $TOOL_FOLDER/scripts/getGNPS_library_annotations.py \
-    merged_results.tsv \
-    merged_results_with_gnps.tsv \
-    --librarysummary library_summary.tsv
-    """
-}
+//     """
+//     python $TOOL_FOLDER/scripts/getGNPS_library_annotations.py \
+//     merged_results.tsv \
+//     merged_results_with_gnps.tsv \
+//     --librarysummary library_summary.tsv
+//     """
+// }
 
 // Molecular Networking
 process networkingGNPSPrepParams {
@@ -490,28 +553,6 @@ process prepInputFiles {
     """
 }
 
-process summaryLibrary {
-    publishDir "$params.publishdir/nf_output", mode: 'copy'
-
-    maxForks 8
-
-    cache 'lenient'
-
-    conda "$TOOL_FOLDER/conda_env.yml"
-
-    input:
-    path library_file
-
-    output:
-    path '*.tsv' optional true
-
-    """
-    python $TOOL_FOLDER/scripts/library_summary.py \
-    $library_file \
-    ${library_file}.tsv
-    """
-}
-
 process createFeatureTable {
     publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
 
@@ -535,7 +576,7 @@ process createFeatureTable {
 
 }
 
-process PrepareForModiFinder{
+process prepareForModiFinder{
     publishDir "$params.publishdir/nf_output", mode: 'copy'
 
     errorStrategy 'ignore'
@@ -569,8 +610,19 @@ workflow {
     // File summaries
     filesummary(input_spectra_ch, _download_ready)
 
+    // Note: For subsequent processes (library search, networking), they use pm_tolerance and fragment_tolerance
+    // These are set to mscluster defaults. If using falcon, users should be aware that downstream processes
+    // will use the mscluster tolerance values unless they also update those parameters.
+    // This is acceptable because downstream processes work on clustered spectra, and the tolerance
+    // for library search and networking can be different from clustering tolerance.
+
     // Clustering
-    (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
+    if(params.clustering_tool == "falcon"){
+        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = falcon(input_spectra_ch, _download_ready)
+    }
+    else{
+        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
+    }
 
     if(params.massql_filter != "None"){
         clustered_spectra_ch = massqlFilterSpectra(clustered_spectra_intermediate_ch)
@@ -590,10 +642,12 @@ workflow {
     library_summary_ch = summaryLibrary(libraries_ch)
 
     // Merging all these tsv files from library_summary_ch within nextflow
-    library_summary_merged_ch = library_summary_ch.collectFile(name: "library_summary.tsv", keepHeader: true)
+    library_summary_merged_ch = library_summary_ch.collectFile(name: 'librarysummary.tsv', keepHeader: true, storeDir: _publishdir + "/librarysummary")
     library_summary_merged_ch = library_summary_merged_ch.ifEmpty(file("NO_FILE"))
 
-    gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch)
+    // Getting library annotations
+    force_offline = "No" // This can be set to Yes to avoid any online queries to GNPS, which is useful for testing or if you have a local copy of the GNPS library
+    gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch, "1", "0", force_offline)
     gnps_library_results_ch = gnps_library_results_ch.ifEmpty(file("NO_FILE"))
 
     // Networking
@@ -650,6 +704,6 @@ workflow {
     createFeatureTable(clusterinfo_ch)
 
     // Preparing for Modifinder
-    PrepareForModiFinder(gnps_library_results_ch, filtered_networking_pairs_enriched_ch)
+    prepareForModiFinder(gnps_library_results_ch, filtered_networking_pairs_enriched_ch)
 
 }
\ No newline at end of file
diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml
index 6af7541..91a5c44 100644
--- a/workflowdisplay.yaml
+++ b/workflowdisplay.yaml
@@ -98,15 +98,27 @@ Views:
             -   title: "SpectrumID"
                 data: SpectrumID
             -   title: "Smiles"
-                data: Smiles    
+                data: Smiles
+            -   title: "Library Name"
+                data: LibraryName
+            -   title: "Library Scan"
+                data: LibScan
         columnDefs: '[ {"targets": 0,"data": null,"render": function ( data, type, row, meta ) {
+        if (row["SpectrumID"] && row["SpectrumID"].includes("CCMSLIB")) {
+            return `
+                <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS:GNPS-LIBRARY:accession:${row["SpectrumID"]}">View Mirror</a>
+            `;
+        }
         return `
-            <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS:GNPS-LIBRARY:accession:${row["SpectrumID"]}">View Mirror</a>
+            <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS2:TASK-${task}-input_libraries/${row["LibraryName"]}:scan:${row["LibScan"]}">View Mirror</a>
         `;}},
         {"targets": 10,"data": null,"render": function ( data, type, row, meta ) {
         return `
             <img src="https://structure.gnps2.org/structureimg?smiles=${encodeURIComponent(row["Smiles"])}"/>
-        `;}},]'
+        `;}},
+        {"targets": [11, 12], "visible": false}]'
+
+
 
 -   name: Network Components List
     displayname: Network Components List
@@ -357,4 +369,4 @@ Views:
     viewname: metadatadownload
     displaytype: download
     parameters:
-        filename: nf_output/metadata/merged_metadata.tsv
\ No newline at end of file
+        filename: nf_output/metadata/merged_metadata.tsvtsv
\ No newline at end of file
diff --git a/workflowinput.yaml b/workflowinput.yaml
index f35d1e1..f534392 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -1,7 +1,7 @@
 workflowname: classical_networking_workflow
 workflowdescription: This is Classical Molecular Networking for GNPS2
 workflowlongdescription: This is Classical Molecular Networking for GNPS2
-workflowversion: "2026.01.21"
+workflowversion: "2026.02.06"
 workflowfile: nf_workflow.nf
 workflowautohide: false
 adminonly: false
@@ -88,6 +88,17 @@ parameterlist:
     - displayname: Clustering Parameters
       paramtype: section
     
+    - displayname: Clustering Tool
+      paramtype: select
+      nf_paramname: clustering_tool
+      formvalue: "mscluster"
+      options:
+        - value: "mscluster"
+          display: "MSCluster"
+        - value: "falcon"
+          display: "Falcon"
+      tooltip: "Select the clustering tool to use for spectral clustering"
+    
     - displayname: Minimum Cluster Size (Or Disable Clustering)
       paramtype: select
       nf_paramname: min_cluster_size
@@ -105,6 +116,68 @@ parameterlist:
           display: "4"
         - value: "5"
           display: "5"
+    
+    - displayname: Falcon Parameters
+      paramtype: section
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Precursor Ion Tolerance
+      paramtype: text
+      nf_paramname: falcon_pm_tolerance
+      formplaceholder: Enter the pm_tolerance (e.g., 20 ppm or 2.0 Da)
+      formvalue: "20 ppm"
+      tooltip: "Precursor tolerance with unit (e.g., 20 ppm or 2.0 Da)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Fragment Ion Tolerance
+      paramtype: text
+      nf_paramname: falcon_fragment_tolerance
+      formplaceholder: Enter the fragment_tolerance
+      formvalue: "0.05"
+      tooltip: "Fragment tolerance (Da)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Epsilon (eps)
+      paramtype: text
+      nf_paramname: falcon_eps
+      formplaceholder: Enter the falcon_eps
+      formvalue: "0.1"
+      tooltip: "Falcon clustering epsilon parameter (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Min MZ
+      paramtype: text
+      nf_paramname: falcon_min_mz
+      formplaceholder: Enter the falcon_min_mz
+      formvalue: "0"
+      tooltip: "Falcon minimum m/z value (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Max MZ
+      paramtype: text
+      nf_paramname: falcon_max_mz
+      formplaceholder: Enter the falcon_max_mz
+      formvalue: "30000"
+      tooltip: "Falcon maximum m/z value (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
 
     - displayname: Advanced Filtering Parameters
       paramtype: section