diff --git a/bin/reformat_ichorcna_seg.py b/bin/reformat_ichorcna_seg.py new file mode 100755 index 0000000..2e9fc13 --- /dev/null +++ b/bin/reformat_ichorcna_seg.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd + +def main(): + parser = argparse.ArgumentParser(description="Format ichorCNA .seg.txt file ") + parser.add_argument("-s", "--segmentation_file", required=True, help="Input ichorCNA .seg file") + parser.add_argument("-o", "--output", default=None, help="Output formatted .seg file") + args = parser.parse_args() + + # Create file + df = pd.read_csv(args.segmentation_file, sep="\t", dtype=str) + + # Check columns in .seg.txt file + required_cols = ["chrom", "start", "end", "logR_Copy_Number", "ID"] + missing = [c for c in required_cols if c not in df.columns] + if missing: + raise ValueError(f"There mandatory columns are missing {args.segmentation_file}: {missing}") + + # Rename columns as required for CX signatures + df_out = ( + df.loc[:, required_cols] + .rename(columns={ + "chrom": "chromosome", + "logR_Copy_Number": "segVal", + "ID": "sample" + }) + ) + + # Convert to numeric + for col in ["start", "end", "segVal"]: + df_out[col] = pd.to_numeric(df_out[col], errors="coerce") + + # Remove NAs + df_out = df_out.dropna(subset=["segVal"]) + + # Output file + out_file = args.output + + # Save formatted segmentation file + df_out.to_csv(out_file, sep="\t", index=False) + + +if __name__ == "__main__": + main() diff --git a/conf/modules.config b/conf/modules.config index 73bc719..c6824c7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -335,6 +335,15 @@ process { ext.when = params.ichorcna_ploidy_aware_plot } + withName: FORMAT_ICHORCNA_SEG { + publishDir = [ + path: { "${params.outdir}/ichorcna/formatted_segmentation_files" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.when = params.compute_signatures + } + withName: CONCATENATE_QDNASEQ_PLOTS { publishDir = [ path: { "${params.outdir}/cn_plots/qdnaseq/" }, diff --git a/modules/local/format_ichorcna_seg/main.nf b/modules/local/format_ichorcna_seg/main.nf new file mode 100644 index 0000000..0000b6a --- /dev/null +++ b/modules/local/format_ichorcna_seg/main.nf @@ -0,0 +1,23 @@ +process FORMAT_ICHORCNA_SEG { + tag "$meta.id" + container "quay.io/einar_rainhart/pandas-pandera:1.5.3" + label 'process_low' + + input: + tuple val(meta), path(seg) + + output: + path "${meta.id}_formatted.seg", emit: seg + + script: + def VERSION = '0.0.1' + + """ + reformat_ichorcna_seg.py -s $seg --o ${meta.id}_formatted.seg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + format_ichorcna: ${VERSION} + END_VERSIONS + """ +} diff --git a/subworkflows/local/ichorcna/main.nf b/subworkflows/local/ichorcna/main.nf index a2cd9f1..43896f0 100644 --- a/subworkflows/local/ichorcna/main.nf +++ b/subworkflows/local/ichorcna/main.nf @@ -4,6 +4,7 @@ include { HMMCOPY_READCOUNTER as HMMCOPY_READCOUNTER_ICHORCNA } from '../../../m include { CORRECT_LOGR_ICHORCNA } from '../../../modules/local/correct_logR_ichorcna/main' include { CONCATENATE_PDF as CONCATENATE_BIN_PLOTS } from '../../../modules/local/concatenate_pdf/main' include { PLOT_ICHORCNA } from '../../../modules/local/plot_ichorcna/main' +include { FORMAT_ICHORCNA_SEG } from '../../../modules/local/format_ichorcna_seg/main' workflow ICHORCNA { take: @@ -76,11 +77,26 @@ workflow ICHORCNA { CONCATENATE_BIN_PLOTS(ICHORCNA_RUN.out.genome_plot.collect { _meta, plot -> plot }) ch_versions = ch_versions.mix(CONCATENATE_BIN_PLOTS.out.versions) + // Create file for signature analysis + + formatted_ichor = FORMAT_ICHORCNA_SEG(ICHORCNA_RUN.out.seg_txt) + + signature_file_ichor = formatted_ichor + .collectFile( + storeDir: "${params.outdir}/ichorcna/", + name: "all_segments_ichorcna_signatures.seg", + keepHeader: true, + skip: 1 + ) + + + emit: - versions = ch_versions - summary = ch_reports - ch_segments = called_segments - ch_bins = bins - gistic_file = corrected_gistic_file - genome_plot = genome_plot + versions = ch_versions + summary = ch_reports + ch_segments = called_segments + ch_bins = bins + gistic_file = corrected_gistic_file + genome_plot = genome_plot + signature_file = signature_file_ichor } diff --git a/subworkflows/local/solid_biopsy/main.nf b/subworkflows/local/solid_biopsy/main.nf index e92b8bc..fccb90f 100644 --- a/subworkflows/local/solid_biopsy/main.nf +++ b/subworkflows/local/solid_biopsy/main.nf @@ -157,6 +157,11 @@ workflow SOLID_BIOPSY { corrected_gistic_file = ICHORCNA.out.gistic_file ch_reports = ch_versions.mix(ICHORCNA.out.summary) ch_versions = ch_versions.mix(ICHORCNA.out.versions) + + // FIXME: Compute signatures (duplication with ASCAT.sc) + CIN_SIGNATURE_QUANTIFICATION(ICHORCNA.out.signature_file) + ch_versions = ch_versions.mix(CIN_SIGNATURE_QUANTIFICATION.out.versions) + ch_reports = ch_reports.mix(CIN_SIGNATURE_QUANTIFICATION.out.sig_activity_plot) } else { error("Unknown CNV caller ${caller}")