From 335df69a996246f7e9952479f89f49903021f5cf Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Wed, 23 Apr 2025 10:53:39 -0700
Subject: [PATCH 01/21] add falcon cluster method to the workflow

---
 bin/conda_env_falcon.yml                | 21 ++++++
 bin/scripts/summarize_results_falcon.py | 88 +++++++++++++++++++++++++
 nf_workflow.nf                          | 64 +++++++++++++++++-
 workflowinput.yaml                      | 10 +++
 4 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 bin/conda_env_falcon.yml
 create mode 100644 bin/scripts/summarize_results_falcon.py

diff --git a/bin/conda_env_falcon.yml b/bin/conda_env_falcon.yml
new file mode 100644
index 0000000..0fb7f39
--- /dev/null
+++ b/bin/conda_env_falcon.yml
@@ -0,0 +1,21 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+channel_priority: strict
+dependencies:
+  - python=3.10
+  - pandas=2.2.0
+  - pyteomics=4.7.1
+  - pip:
+    - numpy==1.23.4
+    - xmltodict
+    - requests
+    - tqdm
+    - psutil
+    - pyopenms==3.0.0
+    - spectrum-utils==0.3.5
+    - falcon-ms
+    - pymzml
+    - numcodecs
+    - pyarrow
\ No newline at end of file
diff --git a/bin/scripts/summarize_results_falcon.py b/bin/scripts/summarize_results_falcon.py
new file mode 100644
index 0000000..d6ec09e
--- /dev/null
+++ b/bin/scripts/summarize_results_falcon.py
@@ -0,0 +1,88 @@
+import os
+import pandas as pd
+import argparse
+import numpy as np
+
+def rewrite_falcon_mgf(mgf_input, mgf_output):
+    with open(mgf_input, 'r') as infile, open(mgf_output, 'w') as outfile:
+        for line in infile:
+            if line.startswith("CLUSTER="):
+                try:
+                    cluster_num = int(line.strip().split("=")[1])
+                    outfile.write(f"SCANS={cluster_num + 1}\n")
+                except ValueError:
+                    outfile.write(line)
+            else:
+                outfile.write(line)
+
+def main():
+    parser = argparse.ArgumentParser(description='Summarizing Falcon Results')
+    parser.add_argument('falcon_clusters', help='falcon_clusters')
+    parser.add_argument('falcon_mgf', help='falcon_mgf')
+    #parser.add_argument('output_summary_folder', help='output_summary_folder')
+    args = parser.parse_args()
+
+    clusterinfo_df = pd.read_csv(args.falcon_clusters, sep=',', comment='#')
+
+    print(args)
+    print(clusterinfo_df)
+
+    clusterinfo_df = clusterinfo_df.sort_values(by='cluster', key=lambda x: x.replace(-1, np.inf))
+
+    # Filtering out not in clusters data
+    clusterinfo_df = clusterinfo_df[clusterinfo_df["cluster"] != -1]
+
+
+    # Grouping by cluster
+    grouped_cluster_df = clusterinfo_df.groupby(["cluster"])
+    cluster_summary_list = []
+    for cluster, cluster_group_df in grouped_cluster_df:
+        #TODO :Read these from mgf, as the representative is a medoid
+
+        cluster_count = len(cluster_group_df)
+        cluster_mz = cluster_group_df["precursor_mz"].mean()
+        cluster_rt = cluster_group_df["retention_time"].mean()
+        cluster_charge = cluster_group_df["precursor_charge"].mean()
+        # adjust the col name to map the classical MN wokflow
+        output_dict = {}
+        output_dict["number of spectra"] = cluster_count
+        output_dict["parent mass"] = cluster_mz
+        output_dict["RTMean"] = cluster_rt
+        output_dict["precursor charge"] = cluster_charge
+        output_dict["cluster index"] = cluster[0] + 1
+
+        cluster_summary_list.append(output_dict)
+
+    # Creating a cluster summary
+    cluster_summary_df = pd.DataFrame(cluster_summary_list)
+    cluster_summary_df.to_csv("clustersummary.tsv", sep='\t', index=False)
+
+    # Creating cluster info
+    clusterinfo_df["filename"] = clusterinfo_df["identifier"].apply(lambda x: x.split(":")[2] + ".mzML")
+    clusterinfo_df["scan"] = clusterinfo_df["identifier"].apply(lambda x: x.split(":")[-1])
+
+    # Rename relevant columns to MS-Cluster format
+    clusterinfo_df = clusterinfo_df.rename(columns={
+        "filename": "#Filename",
+        "cluster": "#ClusterIdx",
+        "scan": "#Scan",
+        "precursor_mz": "#ParentMass",
+        "precursor_charge": "#Charge",
+        "retention_time": "#RetTime"
+    })
+
+    # Just to prevent other processes in the workflow raise error
+    clusterinfo_df["#PrecIntensity"] = 0
+
+    # Select required columns
+    clusterinfo_df = clusterinfo_df[[
+        "#ClusterIdx", "#Filename", "#Scan", "#ParentMass", "#Charge", "#RetTime",  "#PrecIntensity"
+    ]]
+    clusterinfo_df.to_csv("clusterinfo.tsv", sep='\t', index=False)
+
+    #TODO: Rewriting MGF files
+    rewrite_falcon_mgf(args.falcon_mgf, "specs_ms.mgf")
+    # TODO: Maybe make this compatible with FBMN, since its already clustered.
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/nf_workflow.nf b/nf_workflow.nf
index e56637a..0bc53d6 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -14,6 +14,8 @@ params.metadata_filename = "data/metadata.tsv"
 
 // Clustering Parameters
 params.min_cluster_size = "2"
+params.cluster_method = "MS-Cluster" // can be MS-Cluster or Falcon
+params.eps = "0.01" // eps for falcon
 
 // Tolerance Parameters
 params.pm_tolerance = "2.0"
@@ -113,6 +115,56 @@ process mscluster {
     """
 }
 
+process falcon {
+    publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
+
+    conda "$TOOL_FOLDER/conda_env_falcon.yml"
+
+    input:
+    val inputSpectra
+    val ready
+
+    output:
+    file 'falcon.mgf'
+    file 'falcon.csv'
+
+    """
+    mkdir clustering
+    falcon  \
+    ${inputSpectra} \
+    falcon --export_representatives \
+    --precursor_tol 20 ppm \
+    --fragment_tol 0.05 \
+    --min_mz_range 0 \
+    --min_mz 0 \
+    --max_mz 2000 \
+    --min_samples $params.min_cluster_size \
+    --eps $params.eps --work_dir clustering \
+    --overwrite
+    """
+}
+
+process summarize_falcon {
+    publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
+
+    conda "$TOOL_FOLDER/conda_env.yml"
+
+    input:
+    file input_csv
+    file input_mgf
+
+    output:
+    file 'clusterinfo.tsv'
+    file 'clustersummary.tsv'
+    file 'specs_ms.mgf'
+
+    """
+    python $TOOL_FOLDER/scripts/summarize_results_falcon.py \
+    ${input_csv} \
+    ${input_mgf}
+    """
+}
+
 // TODO: Finish Implementing this, as this is currently an no-op
 process massqlFilterSpectra {
     publishDir "$params.publishdir/nf_output", mode: 'copy'
@@ -559,7 +611,8 @@ process PrepareForModiFinder{
 }
 
 workflow {
-    // Preps input spectrum files
+
+// Preps input spectrum files
     input_spectra_ch = Channel.fromPath(params.input_spectra)
 
     // Downloads input data and lists privtae spectra
@@ -570,7 +623,14 @@ workflow {
     filesummary(input_spectra_ch, _download_ready)
 
     // Clustering
-    (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
+    if(params.cluster_method == "MS-Cluster"){
+        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
+    }
+    else if (params.cluster_method == "Falcon"){
+        input_falcon_spectra_ch = params.input_spectra + "/*.mzML"
+        (clustered_spectra_falcon_ch, falcon_tsv_ch) = falcon(input_falcon_spectra_ch, _download_ready)
+        (clusterinfo_ch, clustersummary_ch, clustered_spectra_intermediate_ch) = summarize_falcon(falcon_tsv_ch, clustered_spectra_falcon_ch)
+    }
 
     if(params.massql_filter != "None"){
         clustered_spectra_ch = massqlFilterSpectra(clustered_spectra_intermediate_ch)
diff --git a/workflowinput.yaml b/workflowinput.yaml
index a0bec38..c637872 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -95,6 +95,16 @@ parameterlist:
       formvalue: "2"
       tooltip: "Set to 0 to turn clustering off, be careful with this setting"
 
+    - displayname: Clustering Methods
+      paramtype: select
+      nf_paramname: cluster_method
+      formvalue: "MS-Cluster"
+      options:
+        - value: "MS-Cluster"
+          display: "MS-Cluster"
+        - value: "Falcon"
+          display: "Falcon"
+
     - displayname: Advanced Filtering Parameters
       paramtype: section
 

From 916222b81228a3ef96a4d7a5e0c3349e2508a5af Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Wed, 23 Apr 2025 13:18:15 -0700
Subject: [PATCH 02/21] change the default settings for falcon and correct the
 display name for clustering tool

---
 nf_workflow.nf     | 2 +-
 workflowinput.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nf_workflow.nf b/nf_workflow.nf
index 0bc53d6..707569b 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -133,7 +133,7 @@ process falcon {
     falcon  \
     ${inputSpectra} \
     falcon --export_representatives \
-    --precursor_tol 20 ppm \
+    --precursor_tol 0.05 Da \
     --fragment_tol 0.05 \
     --min_mz_range 0 \
     --min_mz 0 \
diff --git a/workflowinput.yaml b/workflowinput.yaml
index c637872..e3b6d25 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -95,7 +95,7 @@ parameterlist:
       formvalue: "2"
       tooltip: "Set to 0 to turn clustering off, be careful with this setting"
 
-    - displayname: Clustering Methods
+    - displayname: Clustering Tool
       paramtype: select
       nf_paramname: cluster_method
       formvalue: "MS-Cluster"

From e30ba1e385a78af0e31b3098163a282b95812b19 Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Wed, 23 Apr 2025 14:27:31 -0700
Subject: [PATCH 03/21] Update the workflow version number to pin a version for
 Falcon

---
 workflowinput.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflowinput.yaml b/workflowinput.yaml
index e3b6d25..f296a92 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -1,7 +1,7 @@
 workflowname: classical_networking_workflow
 workflowdescription: This is Classical Molecular Networking for GNPS2
 workflowlongdescription: This is Classical Molecular Networking for GNPS2
-workflowversion: "2025.04.11"
+workflowversion: "2025.04.23"
 workflowfile: nf_workflow.nf
 workflowautohide: false
 adminonly: false

From 9960abbd72b3a3f2715f9de4b3966a2f78ebb48e Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Wed, 23 Apr 2025 15:56:32 -0700
Subject: [PATCH 04/21] fix the input file path for Falcon

---
 nf_workflow.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nf_workflow.nf b/nf_workflow.nf
index 707569b..071ef91 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -627,7 +627,7 @@ workflow {
         (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
     }
     else if (params.cluster_method == "Falcon"){
-        input_falcon_spectra_ch = params.input_spectra + "/*.mzML"
+        input_falcon_spectra_ch = params.input_spectra + "/*"
         (clustered_spectra_falcon_ch, falcon_tsv_ch) = falcon(input_falcon_spectra_ch, _download_ready)
         (clusterinfo_ch, clustersummary_ch, clustered_spectra_intermediate_ch) = summarize_falcon(falcon_tsv_ch, clustered_spectra_falcon_ch)
     }

From 913e06f938aa68d71d1b638638c4d07d2cb7a5ad Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Thu, 1 May 2025 10:01:55 -0700
Subject: [PATCH 05/21] updating

---
 workflowdisplay.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml
index c6200a0..f50f3b1 100644
--- a/workflowdisplay.yaml
+++ b/workflowdisplay.yaml
@@ -307,6 +307,15 @@ Views:
             -   matches_path:TASKLOCATION/[task]/nf_output/modifinder_input.csv
             -   with_library_search:"0"
 
+-   name: PostNetworking - MassQL Interactive Analysis
+    displayname: PostNetworking - MassQL Interactive Analysis
+    viewname: massqlpostmn
+    displaytype: linkout
+    parameters:
+        baseurl: "https://massqlpostmn.gnps2.org"
+        urlparam:
+            -   task_id:[task]
+
 -   name: section
     displayname: Download Network Files
     viewname: section

From 3b45ed688a2d85a6919fc1a3b63efd1349b3f091 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Mon, 7 Jul 2025 10:21:05 -0700
Subject: [PATCH 06/21] using dropdown for clustering

---
 workflowinput.yaml | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/workflowinput.yaml b/workflowinput.yaml
index f296a92..875bd7b 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -1,7 +1,7 @@
 workflowname: classical_networking_workflow
 workflowdescription: This is Classical Molecular Networking for GNPS2
 workflowlongdescription: This is Classical Molecular Networking for GNPS2
-workflowversion: "2025.04.23"
+workflowversion: "2026.01.13"
 workflowfile: nf_workflow.nf
 workflowautohide: false
 adminonly: false
@@ -87,13 +87,24 @@ parameterlist:
 
     - displayname: Clustering Parameters
       paramtype: section
-
-    - displayname: Min Cluster Size
-      paramtype: text
+    
+    - displayname: Minimum Cluster Size (Or Disable Clustering)
+      paramtype: select
       nf_paramname: min_cluster_size
-      formplaceholder: Enter the min_cluster_size
       formvalue: "2"
-      tooltip: "Set to 0 to turn clustering off, be careful with this setting"
+      options:
+        - value: "0"
+          display: "Clustering Off"
+        - value: "1"
+          display: "1"
+        - value: "2"
+          display: "2"
+        - value: "3"
+          display: "3"
+        - value: "4"
+          display: "4"
+        - value: "5"
+          display: "5"
 
     - displayname: Clustering Tool
       paramtype: select
@@ -135,7 +146,7 @@ parameterlist:
         - value: "0"
           display: "no"
       
-    - displayname: MassQL Filtering
+    - displayname: MassQL Filtering (EXPERIMENTAL)
       paramtype: text
       nf_paramname: massql_filter
       formplaceholder: Enter the massql_filter

From 96f733c0d015b41182fb854f54c74ff1b3704a83 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 6 Aug 2025 09:30:41 -0700
Subject: [PATCH 07/21] updating

---
 workflowdisplay.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml
index f50f3b1..6af7541 100644
--- a/workflowdisplay.yaml
+++ b/workflowdisplay.yaml
@@ -295,6 +295,16 @@ Views:
             -   input_spectra:TASKLOCATION/[task]/input_spectra
             -   description:Downstream from [task] Classical Molecular Networking
 
+-   name: PostNetworking - MassQL Interactive Analysis
+    displayname: PostNetworking - MassQL Interactive Analysis
+    viewname: massqlpostmn
+    displaytype: linkout
+    parameters:
+        baseurl: "https://massqlpostmn.gnps2.org"
+        urlparam:
+            -   task_id:[task]
+
+
 -   name: ModiFinder - Localize Analog Modifications
     displayname: ModiFinder - Localize Analog Modifications
     viewname: high_throughput_modifinder

From 46f43af1f72f438d33e03504c2001fd3756b00bb Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 8 Oct 2025 11:50:21 -0700
Subject: [PATCH 08/21] getting latest version

---
 GNPS2_DeploymentTooling | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GNPS2_DeploymentTooling b/GNPS2_DeploymentTooling
index a92a048..f0983fa 160000
--- a/GNPS2_DeploymentTooling
+++ b/GNPS2_DeploymentTooling
@@ -1 +1 @@
-Subproject commit a92a0488cd8032ad1274b58c01d6aed47fc7c7c6
+Subproject commit f0983facea2c0604db3113baafb916ba3065fc18

From d946cb0dc39d7c4eeb7386451837cd8bb504a591 Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Wed, 8 Oct 2025 13:04:43 -0700
Subject: [PATCH 09/21] fix similarity mode parameter name

---
 nf_workflow.nf     | 6 +++---
 workflowinput.yaml | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/nf_workflow.nf b/nf_workflow.nf
index 071ef91..7b47415 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -28,7 +28,7 @@ params.precursor_filter = "1"
 
 // Molecular Networking Options
 params.topology = "classic" // or can be transitive
-params.cal_all_pairs ='gnps' //or can be index
+params.similarity ='gnps' //or can be index
 
 params.parallelism = 24
 params.networking_min_matched_peaks = 6
@@ -657,11 +657,11 @@ workflow {
     gnps_library_results_ch = gnps_library_results_ch.ifEmpty(file("NO_FILE"))
 
     // Networking
-    if(params.cal_all_pairs == "gnps"){
+    if(params.similarity == "gnps"){
         params_ch = networkingGNPSPrepParams(clustered_spectra_ch)
         networking_results_temp_ch = calculatePairs(clustered_spectra_ch, params_ch.collect())
     }
-    else if (params.cal_all_pairs == "index"){
+    else if (params.similarity == "index"){
         networking_results_temp_ch = calculatePairs_index(clustered_spectra_ch)
     }
     merged_networking_pairs_ch = networking_results_temp_ch.collectFile(name: "merged_pairs.tsv", storeDir: "./nf_output/networking", keepHeader: true)
diff --git a/workflowinput.yaml b/workflowinput.yaml
index 875bd7b..86ee6a3 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -174,9 +174,9 @@ parameterlist:
       formplaceholder: Enter the networking_max_shift
       formvalue: "1999"
 
-    - displayname: all_pairwise_method
+    - displayname: Similarity Mode
       paramtype: select
-      nf_paramname: cal_all_pairs
+      nf_paramname: similarity
       formvalue: "gnps"
       options:
         - value: "gnps"

From c47ca4248ee3d849b80198f501490cedcecd887d Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Mon, 12 Jan 2026 10:14:04 -0800
Subject: [PATCH 10/21] Bump share code (#43)

* Bump ShareCode

* Bump version.
---
 GNPS_sharedcode | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GNPS_sharedcode b/GNPS_sharedcode
index 632fb36..b647ed6 160000
--- a/GNPS_sharedcode
+++ b/GNPS_sharedcode
@@ -1 +1 @@
-Subproject commit 632fb365c96d2942da8e5514cecceda8d22b288f
+Subproject commit b647ed604af57c88328b03f29a3ed44662db6919

From 0d4555a84d90547543e9f44c6ccebf080eed2103 Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Tue, 13 Jan 2026 12:04:17 -0800
Subject: [PATCH 11/21] add falcon clustering

---
 bin/conda_env_falcon.yml                      |  11 +-
 .../convert_falcon_to_mscluster_format.py     | 212 ++++++++++++++
 bin/scripts/falcon_wrapper.py                 | 273 ++++++++++++++++++
 nf_workflow.nf                                |  86 +++---
 workflowinput.yaml                            |  73 +++++
 5 files changed, 606 insertions(+), 49 deletions(-)
 create mode 100755 bin/scripts/convert_falcon_to_mscluster_format.py
 create mode 100755 bin/scripts/falcon_wrapper.py

diff --git a/bin/conda_env_falcon.yml b/bin/conda_env_falcon.yml
index 0fb7f39..d816410 100644
--- a/bin/conda_env_falcon.yml
+++ b/bin/conda_env_falcon.yml
@@ -5,17 +5,22 @@ channels:
 channel_priority: strict
 dependencies:
   - python=3.10
-  - pandas=2.2.0
+  - pandas=1.5.0
   - pyteomics=4.7.1
+  - libglib
+  - libstdcxx-ng
+  - gcc=12.2.0
   - pip:
     - numpy==1.23.4
     - xmltodict
     - requests
     - tqdm
     - psutil
+    - h5py
     - pyopenms==3.0.0
     - spectrum-utils==0.3.5
-    - falcon-ms
     - pymzml
     - numcodecs
-    - pyarrow
\ No newline at end of file
+    - pyarrow
+    - joblib
+    - git+https://github.com/XianghuWang-287/falcon.git@public-version
diff --git a/bin/scripts/convert_falcon_to_mscluster_format.py b/bin/scripts/convert_falcon_to_mscluster_format.py
new file mode 100755
index 0000000..aa2ccdd
--- /dev/null
+++ b/bin/scripts/convert_falcon_to_mscluster_format.py
@@ -0,0 +1,212 @@
+#!/usr/bin/python
+
+import sys
+import os
+import argparse
+import pandas as pd
+
+
+
+def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_clusterinfo, output_clustersummary, min_cluster_size=2):
+    """
+    Convert falcon output to mscluster format.
+    
+    Falcon format: cluster, filename, scan, precursor_mz, retention_time, new_batch
+    MSCluster format: #ClusterIdx, #Filename, #SpecIdx, #Scan, #ParentMass, #Charge, #RetTime, #PrecIntensity
+    
+    Note: If falcon doesn't have certain fields (like charge, intensity), we use default values (0)
+    instead of fetching from MGF files.
+    """
+    # Load falcon CSV
+    clusterinfo_df = pd.read_csv(falcon_csv, sep=',', comment='#')
+    
+    print(f"Loaded {len(clusterinfo_df)} rows from falcon CSV")
+    print(f"Columns: {clusterinfo_df.columns.tolist()}")
+    
+    
+    if 'identifier' in clusterinfo_df.columns:
+
+        clusterinfo_df["filename"] = clusterinfo_df["identifier"].apply(
+            lambda x: x.split(":")[2] + ".mzML" if len(x.split(":")) > 2 else (x.split(":")[-2] + ".mzML" if len(x.split(":")) > 1 else "unknown.mzML")
+        )
+        clusterinfo_df["scan"] = clusterinfo_df["identifier"].apply(
+            lambda x: int(x.split(":")[-1]) if x.split(":")[-1].isdigit() else 0
+        )
+    
+    # Ensure we have the required columns
+    required_cols = ['cluster', 'filename', 'scan', 'precursor_mz', 'retention_time']
+    missing_cols = [col for col in required_cols if col not in clusterinfo_df.columns]
+    
+    if missing_cols:
+        print(f"ERROR: Required columns not found in falcon CSV: {missing_cols}")
+        print(f"Available columns: {clusterinfo_df.columns.tolist()}")
+        print(f"First few rows:")
+        print(clusterinfo_df.head())
+        sys.exit(1)
+    
+    if min_cluster_size > 1:
+        clusterinfo_df = clusterinfo_df[clusterinfo_df['cluster'] != -1]
+    
+    # Convert to mscluster format
+    mscluster_rows = []
+    spec_idx_counter = 0
+    
+    for idx, row in clusterinfo_df.iterrows():
+        cluster_idx = int(row['cluster'])
+        
+        # Skip singletons if min_cluster_size > 1
+        if cluster_idx == -1 and min_cluster_size > 1:
+            continue
+        
+        # Handle cluster indexing: falcon uses 0-based, mscluster uses 1-based
+        # But we also need to handle -1 (singletons)
+        if cluster_idx == -1:
+            # Singletons: use a large number or handle separately
+            cluster_idx = 999999  # Use a large number for singletons
+        else:
+            cluster_idx = cluster_idx + 1  # Convert to 1-based
+        
+        filename = str(row['filename'])
+        scan = int(row['scan'])
+        precursor_mz = float(row['precursor_mz'])
+        retention_time = float(row['retention_time'])
+        
+        # Use default values for fields that falcon doesn't provide
+        # Don't fetch from MGF files - just use defaults
+        charge = 0
+        precursor_intensity = 0.0
+        
+   
+        if 'precursor_charge' in row and pd.notna(row['precursor_charge']):
+            try:
+                charge = int(row['precursor_charge'])
+            except (ValueError, TypeError):
+                charge = 0
+        elif 'charge' in row and pd.notna(row['charge']):
+            try:
+                charge = int(row['charge'])
+            except (ValueError, TypeError):
+                charge = 0
+        
+        if 'precursor_intensity' in row and pd.notna(row['precursor_intensity']):
+            try:
+                precursor_intensity = float(row['precursor_intensity'])
+            except (ValueError, TypeError):
+                precursor_intensity = 0.0
+        
+        # Convert retention time to seconds if it's in minutes
+        # Falcon typically outputs RT in minutes, mscluster uses seconds
+        if retention_time > 0 and retention_time < 1000:  # Likely in minutes
+            retention_time = retention_time * 60.0
+        
+        if not filename.startswith('input_spectra/'):
+            filename = f"input_spectra/{filename}"
+        
+        spec_idx = spec_idx_counter
+        spec_idx_counter += 1
+        
+        mscluster_row = {
+            '#ClusterIdx': cluster_idx,
+            '#Filename': filename,
+            '#SpecIdx': spec_idx,
+            '#Scan': scan,
+            '#ParentMass': precursor_mz,
+            '#Charge': charge,
+            '#RetTime': retention_time,
+            '#PrecIntensity': precursor_intensity
+        }
+        mscluster_rows.append(mscluster_row)
+    
+    # Create DataFrame
+    mscluster_df = pd.DataFrame(mscluster_rows)
+    
+    # Filter by min_cluster_size
+    if min_cluster_size > 1:
+        # Count spectra per cluster
+        cluster_counts = mscluster_df['#ClusterIdx'].value_counts()
+        valid_clusters = cluster_counts[cluster_counts >= min_cluster_size].index
+        mscluster_df = mscluster_df[mscluster_df['#ClusterIdx'].isin(valid_clusters)]
+    
+    # Create cluster summary
+    cluster_summary_rows = []
+    for cluster_idx in mscluster_df['#ClusterIdx'].unique():
+        cluster_data = mscluster_df[mscluster_df['#ClusterIdx'] == cluster_idx]
+        num_spectra = len(cluster_data)
+        
+        # Calculate mean RT (in minutes)
+        mean_rt = cluster_data['#RetTime'].mean() / 60.0
+        
+        # Calculate parent mass (mean of #ParentMass)
+        parent_mass = cluster_data['#ParentMass'].mean()
+        
+        # Calculate precursor mass (parent mass / charge, then take mean)
+        # If charge is 0, use parent mass directly
+        precursor_masses = []
+        for idx, row in cluster_data.iterrows():
+            if row['#Charge'] > 0:
+                precursor_masses.append(row['#ParentMass'] / row['#Charge'])
+            else:
+                precursor_masses.append(row['#ParentMass'])
+        precursor_mass = sum(precursor_masses) / len(precursor_masses) if precursor_masses else parent_mass
+        
+        # Calculate precursor charge (most common charge, or mean if no clear mode)
+        charges = cluster_data['#Charge'].values
+        charges_nonzero = charges[charges > 0]
+        if len(charges_nonzero) > 0:
+            # Use mode (most common charge) using pandas
+            charge_series = pd.Series(charges_nonzero)
+            mode_values = charge_series.mode()
+            if len(mode_values) > 0:
+                precursor_charge = int(mode_values.iloc[0])
+            else:
+                precursor_charge = int(charges_nonzero.mean())
+        else:
+            precursor_charge = 0
+        
+        # Calculate sum of precursor intensity
+        sum_precursor_intensity = cluster_data['#PrecIntensity'].sum()
+        
+        cluster_summary_row = {
+            'cluster index': cluster_idx,
+            'number of spectra': num_spectra,
+            'parent mass': parent_mass,
+            'precursor charge': precursor_charge,
+            'precursor mass': precursor_mass,
+            'sum(precursor intensity)': sum_precursor_intensity,
+            'RTMean': mean_rt
+        }
+        cluster_summary_rows.append(cluster_summary_row)
+    
+    cluster_summary_df = pd.DataFrame(cluster_summary_rows)
+    cluster_summary_df = cluster_summary_df.sort_values('cluster index')
+    
+    # Save outputs
+    mscluster_df.to_csv(output_clusterinfo, sep='\t', index=False)
+    cluster_summary_df.to_csv(output_clustersummary, sep='\t', index=False)
+    
+    print(f"Converted {len(mscluster_df)} spectra in {len(cluster_summary_df)} clusters")
+    print(f"Saved clusterinfo to {output_clusterinfo}")
+    print(f"Saved clustersummary to {output_clustersummary}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert Falcon output to MSCluster format')
+    parser.add_argument('falcon_csv', help='Falcon CSV output file')
+    parser.add_argument('input_spectra_folder', help='Input spectra folder (for reference, not used for fetching data)')
+    parser.add_argument('output_clusterinfo', help='Output clusterinfo.tsv file')
+    parser.add_argument('output_clustersummary', help='Output clustersummary.tsv file')
+    parser.add_argument('--min_cluster_size', type=int, default=2, help='Minimum cluster size')
+    
+    args = parser.parse_args()
+    
+    convert_falcon_to_mscluster_format(
+        args.falcon_csv,
+        args.input_spectra_folder,
+        args.output_clusterinfo,
+        args.output_clustersummary,
+        args.min_cluster_size
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/scripts/falcon_wrapper.py b/bin/scripts/falcon_wrapper.py
new file mode 100755
index 0000000..792fc76
--- /dev/null
+++ b/bin/scripts/falcon_wrapper.py
@@ -0,0 +1,273 @@
+#!/usr/bin/python
+
+import sys
+import os
+import glob
+import argparse
+import subprocess
+import pandas as pd
+import ming_spectrum_library
+
+def run_falcon(input_spectra_path, output_prefix="falcon", 
+               precursor_tol="20 ppm", fragment_tol=0.05,
+               min_mz_range=0, min_mz=0, max_mz=30000, eps=0.1):
+    """
+    Runs Falcon with specified parameters.
+    input_spectra_path can be a file or a folder.
+    """
+    # Check if input is a file or folder
+    if os.path.isfile(input_spectra_path):
+        # Single file input
+        all_spectrum_files = [input_spectra_path]
+    elif os.path.isdir(input_spectra_path):
+        # Folder input - list all spectrum files
+        all_mgf_files = glob.glob(os.path.join(input_spectra_path, "*.mgf"))
+        all_mzxml_files = glob.glob(os.path.join(input_spectra_path, "*.mzXML"))
+        all_mzml_files = glob.glob(os.path.join(input_spectra_path, "*.mzML"))
+        
+        all_spectrum_files = all_mgf_files + all_mzxml_files + all_mzml_files
+        
+        if len(all_spectrum_files) == 0:
+            print(f"ERROR: No spectrum files found in {input_spectra_path}")
+            sys.exit(1)
+        
+        # Sort these filenames
+        all_spectrum_files.sort()
+    else:
+        print(f"ERROR: Input path does not exist: {input_spectra_path}")
+        sys.exit(1)
+    
+    # Create pattern for falcon (it accepts wildcards or file list)
+    if len(all_spectrum_files) == 1:
+        mzml_pattern = all_spectrum_files[0]
+    else:
+        # Falcon can accept multiple files, we'll pass them as space-separated
+        mzml_pattern = " ".join(all_spectrum_files)
+    
+    # Parse precursor_tol - falcon expects two arguments
+    # Format can be "2.0" (just number) or "20 ppm" (value and unit)
+    # Falcon needs two arguments, so we need to handle both cases
+    precursor_tol_str = str(precursor_tol).strip()
+    if " " in precursor_tol_str:
+        # Already has format like "20 ppm" or "2.0 Da" - split into two args
+        parts = precursor_tol_str.split(None, 1)  # Split on first space
+        if len(parts) == 2:
+            precursor_tol_args = f"{parts[0]} {parts[1]}"
+        else:
+            # Fallback: use value twice
+            precursor_tol_args = f"{parts[0]} {parts[0]}"
+    else:
+        # Just a number, assume Da and use twice (min and max tolerance)
+        try:
+            tol_value = float(precursor_tol_str)
+            precursor_tol_args = f"{tol_value} {tol_value}"
+        except ValueError:
+            # If can't parse, use default
+            precursor_tol_args = "2.0 2.0"
+    
+    command = (
+        f"falcon {mzml_pattern} {output_prefix} "
+        f"--export_representatives "
+        f"--precursor_tol {precursor_tol_args} "
+        f"--fragment_tol {fragment_tol} "
+        f"--min_mz_range {min_mz_range} "
+        f"--min_mz {min_mz} --max_mz {max_mz} "
+        f"--eps {eps} "
+        f"--hash_len 400 "
+        f"--n_neighbors_ann 64 "
+        f"--n_probe 16 "
+        f"--batch_size 32768 "
+    )
+    print(f"[run_falcon] Running: {command}")
+    process = subprocess.Popen(command, shell=True)
+    retcode = process.wait()
+    if retcode != 0:
+        print(f"ERROR: Falcon failed with exit code {retcode}")
+        sys.exit(1)
+
+
+def main():
+    # Parse arguments
+    parser = argparse.ArgumentParser(description='Falcon Clustering Wrapper')
+    parser.add_argument('input_spectra_folder', help='Input Spectra Folder')
+    parser.add_argument('output_spectra_folder', help='Output Spectra Folder')
+    parser.add_argument('final_output_folder', help='final_output_folder')
+    
+    parser.add_argument('--min_cluster_size', default="2", help='min_cluster_size (not used by falcon directly)')
+    
+    parser.add_argument('--pm_tolerance', default="20 ppm", help='pm_tolerance (precursor tolerance)')
+    parser.add_argument('--fragment_tolerance', default="0.05", help='fragment_tolerance')
+    
+    # Filters (not all are used by falcon)
+    parser.add_argument('--min_peak_intensity', default="0.0", help='min_peak_intensity (not used by falcon)')
+    parser.add_argument('--window_filter', default="1", help='window_filter (not used by falcon)')
+    parser.add_argument('--precursor_filter', default="1", help='precursor_filter (not used by falcon)')
+    
+    # Falcon-specific parameters
+    parser.add_argument('--eps', default="0.1", help='Falcon eps parameter')
+    parser.add_argument('--min_mz', default="0", help='Falcon min_mz parameter')
+    parser.add_argument('--max_mz', default="30000", help='Falcon max_mz parameter')
+    
+    args = parser.parse_args()
+    
+    # Check if input is a file or folder (Nextflow may pass a file)
+    input_spectra_path = args.input_spectra_folder
+    if os.path.isfile(args.input_spectra_folder):
+        # If it's a single file, we need to use its directory
+        input_spectra_path = os.path.dirname(args.input_spectra_folder)
+        if not input_spectra_path:
+            input_spectra_path = "."
+    
+    # Running falcon
+    output_prefix = "falcon"
+    run_falcon(input_spectra_path, output_prefix,
+               precursor_tol=args.pm_tolerance,
+               fragment_tol=float(args.fragment_tolerance),
+               min_mz_range=0,
+               min_mz=int(args.min_mz),
+               max_mz=int(args.max_mz),
+               eps=float(args.eps))
+    
+    # Falcon outputs:
+    # - falcon.csv (cluster assignments)
+    # - falcon.mgf (representative spectra)
+    
+    falcon_csv = f"{output_prefix}.csv"
+    falcon_mgf = f"{output_prefix}.mgf"
+    
+    if not os.path.exists(falcon_csv):
+        print(f"ERROR: Falcon output {falcon_csv} not found")
+        sys.exit(1)
+    
+    if not os.path.exists(falcon_mgf):
+        print(f"ERROR: Falcon output {falcon_mgf} not found")
+        sys.exit(1)
+    
+    # Convert falcon output to mscluster format first
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    convert_script = os.path.join(script_dir, "convert_falcon_to_mscluster_format.py")
+    
+    clusterinfo_file = os.path.join(args.final_output_folder, "clusterinfo.tsv")
+    clustersummary_file = os.path.join(args.final_output_folder, "clustersummary.tsv")
+    
+    # For conversion, we need the original input path to get spectrum info
+    convert_input_path = input_spectra_path
+    
+    convert_cmd = (
+        f"python {convert_script} "
+        f"{falcon_csv} "
+        f"{convert_input_path} "
+        f"{clusterinfo_file} "
+        f"{clustersummary_file} "
+        f"--min_cluster_size {args.min_cluster_size}"
+    )
+    
+    print(f"Converting falcon output to mscluster format...")
+    print(f"Running: {convert_cmd}")
+    ret_code = os.system(convert_cmd)
+    
+    if ret_code != 0:
+        print(f"ERROR: Conversion failed with exit code {ret_code}")
+        sys.exit(1)
+    
+    # Now update MGF file with correct scan numbers based on clusterinfo.tsv
+    # The scan number in MGF should match the #ClusterIdx in clusterinfo.tsv
+    # Read clusterinfo to get cluster indices
+    clusterinfo_df = pd.read_csv(clusterinfo_file, sep='\t')
+    
+    # Get unique cluster indices (these will be the scan numbers in MGF)
+    unique_clusters = sorted(clusterinfo_df['#ClusterIdx'].unique())
+    
+    # Read falcon MGF file - parse manually to access cluster field
+    output_mgf_filename = os.path.join(args.final_output_folder, "specs_ms.mgf")
+    os.makedirs(args.final_output_folder, exist_ok=True)
+    
+    # Parse falcon MGF to get cluster information and update scan numbers
+    mgf_spectra = []
+    with open(falcon_mgf, 'r') as f:
+        spec = None
+        for line in f:
+            line = line.strip()
+            if line == 'BEGIN IONS':
+                spec = {'peaks': [], 'headers': {}}
+            elif line == 'END IONS':
+                if spec is not None:
+                    mgf_spectra.append(spec)
+                spec = None
+            elif spec is not None:
+                if '=' in line:
+                    key, val = line.split('=', 1)
+                    spec['headers'][key.upper()] = val
+                else:
+                    # Peak data
+                    parts = line.split()
+                    if len(parts) == 2:
+                        try:
+                            mz, intensity = float(parts[0]), float(parts[1])
+                            spec['peaks'].append((mz, intensity))
+                        except:
+                            pass
+    
+    # Create mapping: falcon cluster (0-based) -> mscluster cluster index (1-based, which is #ClusterIdx)
+    # falcon cluster IDs in MGF are 0-based, but we converted to 1-based in clusterinfo
+    cluster_to_scan = {}
+    for cluster_idx in unique_clusters:
+        # falcon uses 0-based, mscluster uses 1-based
+        falcon_cluster = cluster_idx - 1
+        cluster_to_scan[falcon_cluster] = cluster_idx
+    
+    # Write MGF with correct scan numbers
+    # Only include spectra that correspond to clusters in clusterinfo.tsv
+    # This ensures MGF and clusterinfo.tsv are consistent
+    skipped_count = 0
+    with open(output_mgf_filename, 'w') as out_mgf:
+        for spec in mgf_spectra:
+            # Get cluster ID from falcon MGF
+            falcon_cluster = -1
+            if 'CLUSTER' in spec['headers']:
+                try:
+                    falcon_cluster = int(spec['headers']['CLUSTER'])
+                except (ValueError, TypeError):
+                    pass
+            
+            # Only include spectra that are in clusterinfo.tsv
+            # Skip clusters that were filtered out by min_cluster_size
+            if falcon_cluster in cluster_to_scan:
+                scan_number = cluster_to_scan[falcon_cluster]
+                
+                # Write MGF entry with correct scan number
+                out_mgf.write("BEGIN IONS\n")
+                
+                # Write headers, updating SCANS= line
+                for key, val in spec['headers'].items():
+                    if key == 'SCANS':
+                        out_mgf.write(f"SCANS={scan_number}\n")
+                    else:
+                        out_mgf.write(f"{key}={val}\n")
+                
+                # Add SCANS if it wasn't in the original
+                if 'SCANS' not in spec['headers']:
+                    out_mgf.write(f"SCANS={scan_number}\n")
+                
+                # Write peaks
+                for mz, intensity in spec['peaks']:
+                    out_mgf.write(f"{mz} {intensity}\n")
+                
+                out_mgf.write("END IONS\n\n")
+            else:
+                # Skip clusters that are not in clusterinfo.tsv (filtered out by min_cluster_size)
+                skipped_count += 1
+                if falcon_cluster != -1:  # Only warn for non-singleton clusters
+                    print(f"INFO: Skipping falcon cluster {falcon_cluster} (filtered out by min_cluster_size)")
+    
+    if skipped_count > 0:
+        print(f"INFO: Skipped {skipped_count} spectra that were filtered out by min_cluster_size")
+    
+    print(f"Falcon clustering completed. Output files:")
+    print(f"  - MGF: {output_mgf_filename}")
+    print(f"  - ClusterInfo: {clusterinfo_file}")
+    print(f"  - ClusterSummary: {clustersummary_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/nf_workflow.nf b/nf_workflow.nf
index 7b47415..fd74c3e 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -13,14 +13,20 @@ params.metadata_per_file_grouping = "No" // Yes means that each file can be its
 params.metadata_filename = "data/metadata.tsv"
 
 // Clustering Parameters
+params.clustering_tool = "mscluster" // or "falcon"
 params.min_cluster_size = "2"
-params.cluster_method = "MS-Cluster" // can be MS-Cluster or Falcon
-params.eps = "0.01" // eps for falcon
 
 // Tolerance Parameters
 params.pm_tolerance = "2.0"
 params.fragment_tolerance = "0.5"
 
+// Falcon-specific parameters
+params.falcon_pm_tolerance = "20 ppm"
+params.falcon_fragment_tolerance = "0.05"
+params.falcon_eps = "0.1"
+params.falcon_min_mz = "0"
+params.falcon_max_mz = "30000"
+
 // Filtering
 params.min_peak_intensity = "0.0"
 params.window_filter = "1"
@@ -116,52 +122,37 @@ process mscluster {
 }
 
 process falcon {
-    publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
+    publishDir "$params.publishdir/nf_output", mode: 'copy'
 
     conda "$TOOL_FOLDER/conda_env_falcon.yml"
+    
+    // This is necessary because the glibc libraries are not always used in the conda environment, and defaults to the system which could be old
+    beforeScript 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib'
 
     input:
-    val inputSpectra
+    file inputSpectra
     val ready
 
     output:
-    file 'falcon.mgf'
-    file 'falcon.csv'
+    file 'clustering/specs_ms.mgf'
+    file 'clustering/clusterinfo.tsv'
+    file 'clustering/clustersummary.tsv'
 
     """
     mkdir clustering
-    falcon  \
-    ${inputSpectra} \
-    falcon --export_representatives \
-    --precursor_tol 0.05 Da \
-    --fragment_tol 0.05 \
-    --min_mz_range 0 \
-    --min_mz 0 \
-    --max_mz 2000 \
-    --min_samples $params.min_cluster_size \
-    --eps $params.eps --work_dir clustering \
-    --overwrite
-    """
-}
-
-process summarize_falcon {
-    publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
-
-    conda "$TOOL_FOLDER/conda_env.yml"
-
-    input:
-    file input_csv
-    file input_mgf
-
-    output:
-    file 'clusterinfo.tsv'
-    file 'clustersummary.tsv'
-    file 'specs_ms.mgf'
-
-    """
-    python $TOOL_FOLDER/scripts/summarize_results_falcon.py \
-    ${input_csv} \
-    ${input_mgf}
+    python $TOOL_FOLDER/scripts/falcon_wrapper.py \
+    $inputSpectra \
+    spectra \
+    clustering \
+    --min_cluster_size $params.min_cluster_size \
+    --pm_tolerance "$params.falcon_pm_tolerance" \
+    --fragment_tolerance $params.falcon_fragment_tolerance \
+    --min_peak_intensity $params.min_peak_intensity \
+    --window_filter $params.window_filter \
+    --precursor_filter $params.precursor_filter \
+    --eps $params.falcon_eps \
+    --min_mz $params.falcon_min_mz \
+    --max_mz $params.falcon_max_mz
     """
 }
 
@@ -611,8 +602,7 @@ process PrepareForModiFinder{
 }
 
 workflow {
-
-// Preps input spectrum files
+    // Preps input spectrum files
     input_spectra_ch = Channel.fromPath(params.input_spectra)
 
     // Downloads input data and lists privtae spectra
@@ -622,14 +612,18 @@ workflow {
     // File summaries
     filesummary(input_spectra_ch, _download_ready)
 
+    // Note: For subsequent processes (library search, networking), they use pm_tolerance and fragment_tolerance
+    // These are set to mscluster defaults. If using falcon, users should be aware that downstream processes
+    // will use the mscluster tolerance values unless they also update those parameters.
+    // This is acceptable because downstream processes work on clustered spectra, and the tolerance
+    // for library search and networking can be different from clustering tolerance.
+
     // Clustering
-    if(params.cluster_method == "MS-Cluster"){
-        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
+    if(params.clustering_tool == "falcon"){
+        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = falcon(input_spectra_ch, _download_ready)
     }
-    else if (params.cluster_method == "Falcon"){
-        input_falcon_spectra_ch = params.input_spectra + "/*"
-        (clustered_spectra_falcon_ch, falcon_tsv_ch) = falcon(input_falcon_spectra_ch, _download_ready)
-        (clusterinfo_ch, clustersummary_ch, clustered_spectra_intermediate_ch) = summarize_falcon(falcon_tsv_ch, clustered_spectra_falcon_ch)
+    else{
+        (clustered_spectra_intermediate_ch, clusterinfo_ch, clustersummary_ch) = mscluster(input_spectra_ch, _download_ready)
     }
 
     if(params.massql_filter != "None"){
diff --git a/workflowinput.yaml b/workflowinput.yaml
index 86ee6a3..6164359 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -88,6 +88,17 @@ parameterlist:
     - displayname: Clustering Parameters
       paramtype: section
     
+    - displayname: Clustering Tool
+      paramtype: select
+      nf_paramname: clustering_tool
+      formvalue: "mscluster"
+      options:
+        - value: "mscluster"
+          display: "MSCluster"
+        - value: "falcon"
+          display: "Falcon"
+      tooltip: "Select the clustering tool to use for spectral clustering"
+    
     - displayname: Minimum Cluster Size (Or Disable Clustering)
       paramtype: select
       nf_paramname: min_cluster_size
@@ -105,6 +116,68 @@ parameterlist:
           display: "4"
         - value: "5"
           display: "5"
+    
+    - displayname: Falcon Parameters
+      paramtype: section
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Precursor Ion Tolerance
+      paramtype: text
+      nf_paramname: falcon_pm_tolerance
+      formplaceholder: Enter the pm_tolerance (e.g., 20 ppm or 2.0 Da)
+      formvalue: "20 ppm"
+      tooltip: "Precursor tolerance with unit (e.g., 20 ppm or 2.0 Da)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Fragment Ion Tolerance
+      paramtype: text
+      nf_paramname: falcon_fragment_tolerance
+      formplaceholder: Enter the fragment_tolerance
+      formvalue: "0.05"
+      tooltip: "Fragment tolerance (Da)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Epsilon (eps)
+      paramtype: text
+      nf_paramname: falcon_eps
+      formplaceholder: Enter the falcon_eps
+      formvalue: "0.1"
+      tooltip: "Falcon clustering epsilon parameter (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Min MZ
+      paramtype: text
+      nf_paramname: falcon_min_mz
+      formplaceholder: Enter the falcon_min_mz
+      formvalue: "0"
+      tooltip: "Falcon minimum m/z value (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
+    
+    - displayname: Falcon Max MZ
+      paramtype: text
+      nf_paramname: falcon_max_mz
+      formplaceholder: Enter the falcon_max_mz
+      formvalue: "30000"
+      tooltip: "Falcon maximum m/z value (only used when clustering_tool is falcon)"
+      showif:
+        - condition:
+          - key: clustering_tool
+            value: "falcon"
 
     - displayname: Clustering Tool
       paramtype: select

From 10cbf66ddf641ce706c84c5db9986ebbdf4e3a2c Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Tue, 13 Jan 2026 14:12:21 -0800
Subject: [PATCH 12/21] bugfix

---
 bin/conda_env_falcon.yml |  2 +-
 workflowinput.yaml       | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/bin/conda_env_falcon.yml b/bin/conda_env_falcon.yml
index d816410..3c27485 100644
--- a/bin/conda_env_falcon.yml
+++ b/bin/conda_env_falcon.yml
@@ -23,4 +23,4 @@ dependencies:
     - numcodecs
     - pyarrow
     - joblib
-    - git+https://github.com/XianghuWang-287/falcon.git@public-version
+    - falcon-ms
diff --git a/workflowinput.yaml b/workflowinput.yaml
index 6164359..fb869a5 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -179,16 +179,6 @@ parameterlist:
           - key: clustering_tool
             value: "falcon"
 
-    - displayname: Clustering Tool
-      paramtype: select
-      nf_paramname: cluster_method
-      formvalue: "MS-Cluster"
-      options:
-        - value: "MS-Cluster"
-          display: "MS-Cluster"
-        - value: "Falcon"
-          display: "Falcon"
-
     - displayname: Advanced Filtering Parameters
       paramtype: section
 

From 71457a600f543ad375c98c9686cdcd84c0b5ac3a Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Tue, 13 Jan 2026 14:36:38 -0800
Subject: [PATCH 13/21] update falcon env

---
 bin/conda_env_falcon.yml | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/bin/conda_env_falcon.yml b/bin/conda_env_falcon.yml
index 3c27485..1bb31cf 100644
--- a/bin/conda_env_falcon.yml
+++ b/bin/conda_env_falcon.yml
@@ -5,22 +5,17 @@ channels:
 channel_priority: strict
 dependencies:
   - python=3.10
-  - pandas=1.5.0
+  - pandas=2.2.0
   - pyteomics=4.7.1
-  - libglib
-  - libstdcxx-ng
-  - gcc=12.2.0
   - pip:
     - numpy==1.23.4
     - xmltodict
     - requests
     - tqdm
     - psutil
-    - h5py
     - pyopenms==3.0.0
     - spectrum-utils==0.3.5
+    - falcon-ms
     - pymzml
     - numcodecs
     - pyarrow
-    - joblib
-    - falcon-ms

From 16e6704dd099b76bff0f30b951eeed7eab697817 Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Fri, 23 Jan 2026 13:30:51 -0800
Subject: [PATCH 14/21] bug fix for cluster id mapping

---
 .../convert_falcon_to_mscluster_format.py     | 42 ++++++++++-
 bin/scripts/falcon_wrapper.py                 | 71 +++++++++++++------
 2 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/bin/scripts/convert_falcon_to_mscluster_format.py b/bin/scripts/convert_falcon_to_mscluster_format.py
index aa2ccdd..e1310f7 100755
--- a/bin/scripts/convert_falcon_to_mscluster_format.py
+++ b/bin/scripts/convert_falcon_to_mscluster_format.py
@@ -120,6 +120,16 @@ def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_
     # Create DataFrame
     mscluster_df = pd.DataFrame(mscluster_rows)
     
+    # IMPORTANT: Before filtering, save the original falcon cluster ID (0-based) for each row
+    # This will help us match falcon MGF clusters to clusterinfo clusters later
+    # The original falcon cluster ID is stored in the 'cluster' column from falcon.csv
+    # We need to track: original_falcon_cluster (0-based) -> sequential_index (1-based)
+    
+    # First, add a column to track original falcon cluster (0-based) before filtering
+    # We'll need to reconstruct this from the cluster_idx we converted
+    # cluster_idx was converted from falcon's 0-based to 1-based, so original = cluster_idx - 1
+    mscluster_df['_original_falcon_cluster'] = mscluster_df['#ClusterIdx'] - 1
+    
     # Filter by min_cluster_size
     if min_cluster_size > 1:
         # Count spectra per cluster
@@ -127,9 +137,29 @@ def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_
         valid_clusters = cluster_counts[cluster_counts >= min_cluster_size].index
         mscluster_df = mscluster_df[mscluster_df['#ClusterIdx'].isin(valid_clusters)]
     
-    # Create cluster summary
+    # IMPORTANT: Remap cluster indices to sequential (1, 2, 3, ...) for consistency
+    # This ensures clusterinfo.tsv, clustersummary.tsv, and MGF SCANS all use the same sequential indices
+    # This is necessary for compatibility with ExecMolecularParallelPairs which uses index-based CLUSTERID
+    original_clusters = sorted(mscluster_df['#ClusterIdx'].unique())
+    cluster_remap = {orig: new for new, orig in enumerate(original_clusters, start=1)}
+    
+    # Create mapping: original falcon cluster (0-based) -> sequential index (1-based)
+    # This mapping will be used in falcon_wrapper.py to match MGF clusters
+    falcon_cluster_to_sequential = {}
+    for orig_cluster_idx in original_clusters:
+        original_falcon_cluster = orig_cluster_idx - 1  # Convert back to 0-based
+        sequential_idx = cluster_remap[orig_cluster_idx]
+        falcon_cluster_to_sequential[original_falcon_cluster] = sequential_idx
+    
+    # Remap #ClusterIdx in mscluster_df
+    mscluster_df['#ClusterIdx'] = mscluster_df['#ClusterIdx'].map(cluster_remap)
+    
+    # Remove the temporary column
+    mscluster_df = mscluster_df.drop(columns=['_original_falcon_cluster'])
+    
+    # Create cluster summary with remapped indices
     cluster_summary_rows = []
-    for cluster_idx in mscluster_df['#ClusterIdx'].unique():
+    for cluster_idx in sorted(mscluster_df['#ClusterIdx'].unique()):
         cluster_data = mscluster_df[mscluster_df['#ClusterIdx'] == cluster_idx]
         num_spectra = len(cluster_data)
         
@@ -167,7 +197,7 @@ def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_
         sum_precursor_intensity = cluster_data['#PrecIntensity'].sum()
         
         cluster_summary_row = {
-            'cluster index': cluster_idx,
+            'cluster index': cluster_idx,  # Already remapped to sequential
             'number of spectra': num_spectra,
             'parent mass': parent_mass,
             'precursor charge': precursor_charge,
@@ -180,6 +210,11 @@ def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_
     cluster_summary_df = pd.DataFrame(cluster_summary_rows)
     cluster_summary_df = cluster_summary_df.sort_values('cluster index')
     
+    # Ensure cluster index is string type to match network graph node types
+    # Network graph nodes from pairs file (CLUSTERID1/CLUSTERID2) are typically strings
+    # This ensures type matching when add_clusterinfo_summary_to_graph checks "if cluster_index in G"
+    cluster_summary_df['cluster index'] = cluster_summary_df['cluster index'].astype(str)
+    
     # Save outputs
     mscluster_df.to_csv(output_clusterinfo, sep='\t', index=False)
     cluster_summary_df.to_csv(output_clustersummary, sep='\t', index=False)
@@ -187,6 +222,7 @@ def convert_falcon_to_mscluster_format(falcon_csv, input_spectra_folder, output_
     print(f"Converted {len(mscluster_df)} spectra in {len(cluster_summary_df)} clusters")
     print(f"Saved clusterinfo to {output_clusterinfo}")
     print(f"Saved clustersummary to {output_clustersummary}")
+    print(f"Note: Cluster indices have been remapped to sequential (1, 2, 3, ...) for consistency")
 
 
 def main():
diff --git a/bin/scripts/falcon_wrapper.py b/bin/scripts/falcon_wrapper.py
index 792fc76..f984a71 100755
--- a/bin/scripts/falcon_wrapper.py
+++ b/bin/scripts/falcon_wrapper.py
@@ -171,11 +171,12 @@ def main():
         sys.exit(1)
     
     # Now update MGF file with correct scan numbers based on clusterinfo.tsv
-    # The scan number in MGF should match the #ClusterIdx in clusterinfo.tsv
+    # IMPORTANT: clusterinfo.tsv now uses sequential indices (1, 2, 3, ...)
+    # The scan number in MGF should match the #ClusterIdx in clusterinfo.tsv (which is already sequential)
     # Read clusterinfo to get cluster indices
     clusterinfo_df = pd.read_csv(clusterinfo_file, sep='\t')
     
-    # Get unique cluster indices (these will be the scan numbers in MGF)
+    # Get unique cluster indices (these are already sequential: 1, 2, 3, ...)
     unique_clusters = sorted(clusterinfo_df['#ClusterIdx'].unique())
     
     # Read falcon MGF file - parse manually to access cluster field
@@ -208,18 +209,42 @@ def main():
                         except:
                             pass
     
-    # Create mapping: falcon cluster (0-based) -> mscluster cluster index (1-based, which is #ClusterIdx)
-    # falcon cluster IDs in MGF are 0-based, but we converted to 1-based in clusterinfo
-    cluster_to_scan = {}
-    for cluster_idx in unique_clusters:
-        # falcon uses 0-based, mscluster uses 1-based
-        falcon_cluster = cluster_idx - 1
-        cluster_to_scan[falcon_cluster] = cluster_idx
-    
-    # Write MGF with correct scan numbers
-    # Only include spectra that correspond to clusters in clusterinfo.tsv
-    # This ensures MGF and clusterinfo.tsv are consistent
+    # IMPORTANT: clusterinfo.tsv now uses sequential indices (1, 2, 3, ...)
+    # We need to map falcon clusters (0-based) to these sequential indices
+    # Rebuild the mapping by reading falcon.csv and matching with clusterinfo.tsv
+    # This avoids needing a separate JSON file that might not be passed through Nextflow
+    
+    # Read falcon.csv to get original falcon cluster assignments
+    falcon_cluster_to_sequential = {}
+    if os.path.exists(falcon_csv):
+        falcon_df = pd.read_csv(falcon_csv, sep=',', comment='#')
+        
+        # Filter by min_cluster_size (same as in convert_falcon_to_mscluster_format.py)
+        if int(args.min_cluster_size) > 1:
+            falcon_df = falcon_df[falcon_df['cluster'] != -1]
+        
+        # Group by original falcon cluster (0-based)
+        # The sequential index in clusterinfo corresponds to the order after filtering
+        # We'll match by grouping falcon clusters and assigning sequential indices
+        original_clusters = sorted(falcon_df['cluster'].unique())
+        sequential_idx = 1
+        for orig_falcon_cluster in original_clusters:
+            # Count spectra in this cluster
+            cluster_spectra = falcon_df[falcon_df['cluster'] == orig_falcon_cluster]
+            if len(cluster_spectra) >= int(args.min_cluster_size):
+                falcon_cluster_to_sequential[orig_falcon_cluster] = sequential_idx
+                sequential_idx += 1
+        
+        print(f"Rebuilt falcon cluster mapping: {len(falcon_cluster_to_sequential)} clusters")
+    else:
+        print(f"WARNING: falcon.csv not found at {falcon_csv}, cannot rebuild mapping")
+        falcon_cluster_to_sequential = {}
+    
+    # Write MGF with sequential scan numbers matching clusterinfo.tsv
+    # Since clusterinfo.tsv already has sequential indices, we'll use them directly
     skipped_count = 0
+    processed_clusters = set()  # Track which sequential cluster indices we've processed
+    
     with open(output_mgf_filename, 'w') as out_mgf:
         for spec in mgf_spectra:
             # Get cluster ID from falcon MGF
@@ -230,12 +255,12 @@ def main():
                 except (ValueError, TypeError):
                     pass
             
-            # Only include spectra that are in clusterinfo.tsv
-            # Skip clusters that were filtered out by min_cluster_size
-            if falcon_cluster in cluster_to_scan:
-                scan_number = cluster_to_scan[falcon_cluster]
+            # Map falcon cluster (0-based) to sequential index (1-based)
+            if falcon_cluster in falcon_cluster_to_sequential:
+                sequential_idx = falcon_cluster_to_sequential[falcon_cluster]
+                scan_number = sequential_idx  # Use sequential index as SCANS
                 
-                # Write MGF entry with correct scan number
+                # Write MGF entry with sequential scan number
                 out_mgf.write("BEGIN IONS\n")
                 
                 # Write headers, updating SCANS= line
@@ -254,11 +279,17 @@ def main():
                     out_mgf.write(f"{mz} {intensity}\n")
                 
                 out_mgf.write("END IONS\n\n")
+                processed_clusters.add(sequential_idx)
             else:
-                # Skip clusters that are not in clusterinfo.tsv (filtered out by min_cluster_size)
+                # Skip clusters that are not in the mapping (filtered out by min_cluster_size)
                 skipped_count += 1
                 if falcon_cluster != -1:  # Only warn for non-singleton clusters
-                    print(f"INFO: Skipping falcon cluster {falcon_cluster} (filtered out by min_cluster_size)")
+                    print(f"INFO: Skipping falcon cluster {falcon_cluster} (filtered out by min_cluster_size or not in mapping)")
+    
+    if skipped_count > 0:
+        print(f"INFO: Skipped {skipped_count} spectra that were filtered out by min_cluster_size")
+    
+    print(f"Wrote {len(processed_clusters)} spectra to MGF file with sequential SCANS (1, 2, 3, ...)")
     
     if skipped_count > 0:
         print(f"INFO: Skipped {skipped_count} spectra that were filtered out by min_cluster_size")

From c1df14fd96b51c94d9c3bf648a73f0807254aa11 Mon Sep 17 00:00:00 2001
From: XianghuWang-287 <wangxianghu1019@126.com>
Date: Fri, 23 Jan 2026 13:51:59 -0800
Subject: [PATCH 15/21] update workflow version

---
 workflowinput.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflowinput.yaml b/workflowinput.yaml
index fb869a5..523543c 100644
--- a/workflowinput.yaml
+++ b/workflowinput.yaml
@@ -1,7 +1,7 @@
 workflowname: classical_networking_workflow
 workflowdescription: This is Classical Molecular Networking for GNPS2
 workflowlongdescription: This is Classical Molecular Networking for GNPS2
-workflowversion: "2026.01.13"
+workflowversion: "2026.01.23"
 workflowfile: nf_workflow.nf
 workflowautohide: false
 adminonly: false

From 075a972080f320f9e4742ee888a7bb8a470714c2 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Fri, 6 Feb 2026 12:10:13 -0800
Subject: [PATCH 16/21] using library summary with a module

---
 .gitmodules         |  3 +++
 bin/NextflowModules |  1 +
 nf_workflow.nf      | 43 ++++++++++++++++++++-----------------------
 3 files changed, 24 insertions(+), 23 deletions(-)
 create mode 160000 bin/NextflowModules

diff --git a/.gitmodules b/.gitmodules
index b0a66e4..e4e3b30 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
 [submodule "bin/scripts/downloadpublicdata"]
 	path = bin/scripts/downloadpublicdata
 	url = https://github.com/Wang-Bioinformatics-Lab/downloadpublicdata.git
+[submodule "bin/NextflowModules"]
+	path = bin/NextflowModules
+	url = https://github.com/Wang-Bioinformatics-Lab/NextflowModules.git
diff --git a/bin/NextflowModules b/bin/NextflowModules
new file mode 160000
index 0000000..ad68182
--- /dev/null
+++ b/bin/NextflowModules
@@ -0,0 +1 @@
+Subproject commit ad681820f3795f45c772946b9b3b6da1baac580b
diff --git a/nf_workflow.nf b/nf_workflow.nf
index fd74c3e..b849129 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -74,9 +74,28 @@ params.cache_directory = "data/cache"
 
 params.publishdir = "$baseDir"
 TOOL_FOLDER = "$baseDir/bin"
+MODULES_FOLDER = "$TOOL_FOLDER/NextflowModules"
+
+// COMPATIBILITY NOTE: The following might be necessary if this workflow is being deployed in a slightly different environemnt
+// checking if outdir is defined,
+// if so, then set publishdir to outdir
+if (params.outdir) {
+    _publishdir = params.outdir
+}
+else{
+    _publishdir = params.publishdir
+}
+
+// Augmenting with nf_output
+_publishdir = "${_publishdir}/nf_output"
+
+
+// A lot of useful modules are already implemented and added to the nextflow modules, you can import them to use
+// the publishdir is a key word that we're using around all our modules to control where the output files will be saved
+include {summaryLibrary} from "$MODULES_FOLDER/nf_library_search_modules.nf"
 
 process filesummary {
-    publishDir "$params.publishdir/nf_output", mode: 'copy'
+    publishDir "$_publishdir", mode: 'copy'
 
     conda "$TOOL_FOLDER/conda_env.yml"
 
@@ -533,28 +552,6 @@ process prepInputFiles {
     """
 }
 
-process summaryLibrary {
-    publishDir "$params.publishdir/nf_output", mode: 'copy'
-
-    maxForks 8
-
-    cache 'lenient'
-
-    conda "$TOOL_FOLDER/conda_env.yml"
-
-    input:
-    path library_file
-
-    output:
-    path '*.tsv' optional true
-
-    """
-    python $TOOL_FOLDER/scripts/library_summary.py \
-    $library_file \
-    ${library_file}.tsv
-    """
-}
-
 process createFeatureTable {
     publishDir "$params.publishdir/nf_output/clustering", mode: 'copy'
 

From f315e7c98bb0b9bc3bbf5ccc0a9cda0f1652cfa2 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Fri, 6 Feb 2026 12:10:59 -0800
Subject: [PATCH 17/21] saving file

---
 nf_workflow.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nf_workflow.nf b/nf_workflow.nf
index b849129..2095410 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -641,7 +641,7 @@ workflow {
     library_summary_ch = summaryLibrary(libraries_ch)
 
     // Merging all these tsv files from library_summary_ch within nextflow
-    library_summary_merged_ch = library_summary_ch.collectFile(name: "library_summary.tsv", keepHeader: true)
+    library_summary_ch.collectFile(name: 'librarysummary.tsv', keepHeader: true, storeDir: _publishdir + "/librarysummary")
     library_summary_merged_ch = library_summary_merged_ch.ifEmpty(file("NO_FILE"))
 
     gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch)

From ae66e586c408fa5c987bd7088140b23d7a2e528c Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 11 Feb 2026 21:14:07 -0800
Subject: [PATCH 18/21] place holder for library

---
 workflowdisplay.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml
index 6af7541..434f0e3 100644
--- a/workflowdisplay.yaml
+++ b/workflowdisplay.yaml
@@ -100,8 +100,13 @@ Views:
             -   title: "Smiles"
                 data: Smiles    
         columnDefs: '[ {"targets": 0,"data": null,"render": function ( data, type, row, meta ) {
+        if (row["SpectrumID"] && row["SpectrumID"].includes("CCMSLIB")) {
+            return `
+                <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS:GNPS-LIBRARY:accession:${row["SpectrumID"]}">View Mirror</a>
+            `;
+        }
         return `
-            <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS:GNPS-LIBRARY:accession:${row["SpectrumID"]}">View Mirror</a>
+            <span>CCMSLIB view coming soon</span>
         `;}},
         {"targets": 10,"data": null,"render": function ( data, type, row, meta ) {
         return `

From 5f03570268b9a5df7b7d63efe9c2cb0e3b0242c1 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 11 Feb 2026 21:14:16 -0800
Subject: [PATCH 19/21] cleanup rendering

---
 Makefile       | 5 ++++-
 nf_workflow.nf | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 8c0d434..1edcc5d 100644
--- a/Makefile
+++ b/Makefile
@@ -6,4 +6,7 @@ run_usi_download:
 	--download_usi_filename=./data/usi_files/input_files.tsv --cache_directory=./data/cache
 
 run_transitive:
-	nextflow run ./nf_workflow.nf --resume -c nextflow.config --topology=transitive
\ No newline at end of file
+	nextflow run ./nf_workflow.nf --resume -c nextflow.config --topology=transitive
+
+init_modules:
+	git submodule update --init --recursive
\ No newline at end of file
diff --git a/nf_workflow.nf b/nf_workflow.nf
index 2095410..1ee91d0 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -641,7 +641,7 @@ workflow {
     library_summary_ch = summaryLibrary(libraries_ch)
 
     // Merging all these tsv files from library_summary_ch within nextflow
-    library_summary_ch.collectFile(name: 'librarysummary.tsv', keepHeader: true, storeDir: _publishdir + "/librarysummary")
+    library_summary_merged_ch = library_summary_ch.collectFile(name: 'librarysummary.tsv', keepHeader: true, storeDir: _publishdir + "/librarysummary")
     library_summary_merged_ch = library_summary_merged_ch.ifEmpty(file("NO_FILE"))
 
     gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch)

From 1da51dc6a7789529e440b3ab4e15dd131effb56c Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 11 Feb 2026 21:17:27 -0800
Subject: [PATCH 20/21] cleanup

---
 nf_workflow.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nf_workflow.nf b/nf_workflow.nf
index 1ee91d0..77e2b5e 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -575,7 +575,7 @@ process createFeatureTable {
 
 }
 
-process PrepareForModiFinder{
+process prepareForModiFinder{
     publishDir "$params.publishdir/nf_output", mode: 'copy'
 
     errorStrategy 'ignore'
@@ -701,6 +701,6 @@ workflow {
     createFeatureTable(clusterinfo_ch)
 
     // Preparing for Modifinder
-    PrepareForModiFinder(gnps_library_results_ch, filtered_networking_pairs_enriched_ch)
+    prepareForModiFinder(gnps_library_results_ch, filtered_networking_pairs_enriched_ch)
 
 }
\ No newline at end of file

From 1d84df19377d51d01f07eb9fab10a0716fe9b5d2 Mon Sep 17 00:00:00 2001
From: Mingxun Wang <mwang87@gmail.com>
Date: Wed, 11 Feb 2026 22:02:58 -0800
Subject: [PATCH 21/21] fixing linkouts for private libraries

---
 bin/NextflowModules  |  2 +-
 nf_workflow.nf       | 39 +++++++++++++++++++++------------------
 workflowdisplay.yaml | 15 +++++++++++----
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/bin/NextflowModules b/bin/NextflowModules
index ad68182..f357f3a 160000
--- a/bin/NextflowModules
+++ b/bin/NextflowModules
@@ -1 +1 @@
-Subproject commit ad681820f3795f45c772946b9b3b6da1baac580b
+Subproject commit f357f3ae993553b011347a5af9d46d9318ff7b20
diff --git a/nf_workflow.nf b/nf_workflow.nf
index 77e2b5e..2316d93 100644
--- a/nf_workflow.nf
+++ b/nf_workflow.nf
@@ -93,6 +93,7 @@ _publishdir = "${_publishdir}/nf_output"
 // A lot of useful modules are already implemented and added to the nextflow modules, you can import them to use
 // the publishdir is a key word that we're using around all our modules to control where the output files will be saved
 include {summaryLibrary} from "$MODULES_FOLDER/nf_library_search_modules.nf"
+include {librarygetGNPSAnnotations} from "$MODULES_FOLDER/nf_library_search_modules.nf" addParams(publishdir: "$_publishdir/library")
 
 process filesummary {
     publishDir "$_publishdir", mode: 'copy'
@@ -242,28 +243,28 @@ process librarymergeResults {
     """
 }
 
-process librarygetGNPSAnnotations {
-    publishDir "$params.publishdir/nf_output/library", mode: 'copy'
+// process librarygetGNPSAnnotations {
+//     publishDir "$params.publishdir/nf_output/library", mode: 'copy'
 
-    //cache 'lenient'
-    cache 'false'
+//     //cache 'lenient'
+//     cache 'false'
 
-    conda "$TOOL_FOLDER/conda_env.yml"
+//     conda "$TOOL_FOLDER/conda_env.yml"
 
-    input:
-    path "merged_results.tsv"
-    path "library_summary.tsv"
+//     input:
+//     path "merged_results.tsv"
+//     path "library_summary.tsv"
 
-    output:
-    path 'merged_results_with_gnps.tsv'
+//     output:
+//     path 'merged_results_with_gnps.tsv'
 
-    """
-    python $TOOL_FOLDER/scripts/getGNPS_library_annotations.py \
-    merged_results.tsv \
-    merged_results_with_gnps.tsv \
-    --librarysummary library_summary.tsv
-    """
-}
+//     """
+//     python $TOOL_FOLDER/scripts/getGNPS_library_annotations.py \
+//     merged_results.tsv \
+//     merged_results_with_gnps.tsv \
+//     --librarysummary library_summary.tsv
+//     """
+// }
 
 // Molecular Networking
 process networkingGNPSPrepParams {
@@ -644,7 +645,9 @@ workflow {
     library_summary_merged_ch = library_summary_ch.collectFile(name: 'librarysummary.tsv', keepHeader: true, storeDir: _publishdir + "/librarysummary")
     library_summary_merged_ch = library_summary_merged_ch.ifEmpty(file("NO_FILE"))
 
-    gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch)
+    // Getting library annotations
+    force_offline = "No" // This can be set to Yes to avoid any online queries to GNPS, which is useful for testing or if you have a local copy of the GNPS library
+    gnps_library_results_ch = librarygetGNPSAnnotations(merged_results_ch, library_summary_merged_ch, "1", "0", force_offline)
     gnps_library_results_ch = gnps_library_results_ch.ifEmpty(file("NO_FILE"))
 
     // Networking
diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml
index 434f0e3..91a5c44 100644
--- a/workflowdisplay.yaml
+++ b/workflowdisplay.yaml
@@ -98,7 +98,11 @@ Views:
             -   title: "SpectrumID"
                 data: SpectrumID
             -   title: "Smiles"
-                data: Smiles    
+                data: Smiles
+            -   title: "Library Name"
+                data: LibraryName
+            -   title: "Library Scan"
+                data: LibScan
         columnDefs: '[ {"targets": 0,"data": null,"render": function ( data, type, row, meta ) {
         if (row["SpectrumID"] && row["SpectrumID"].includes("CCMSLIB")) {
             return `
@@ -106,12 +110,15 @@ Views:
             `;
         }
         return `
-            <span>CCMSLIB view coming soon</span>
+            <a target="_blank" href="https://metabolomics-usi.gnps2.org/dashinterface/?usi1=mzspec:GNPS2:TASK-${task}-nf_output/clustering/specs_ms.mgf:scan:${row["#Scan#"]}&usi2=mzspec:GNPS2:TASK-${task}-input_libraries/${row["LibraryName"]}:scan:${row["LibScan"]}">View Mirror</a>
         `;}},
         {"targets": 10,"data": null,"render": function ( data, type, row, meta ) {
         return `
             <img src="https://structure.gnps2.org/structureimg?smiles=${encodeURIComponent(row["Smiles"])}"/>
-        `;}},]'
+        `;}},
+        {"targets": [11, 12], "visible": false}]'
+
+
 
 -   name: Network Components List
     displayname: Network Components List
@@ -362,4 +369,4 @@ Views:
     viewname: metadatadownload
     displaytype: download
     parameters:
-        filename: nf_output/metadata/merged_metadata.tsv
\ No newline at end of file
+        filename: nf_output/metadata/merged_metadata.tsvtsv
\ No newline at end of file