Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions bin/reformat_ichorcna_seg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python3

import argparse
import pandas as pd

def main():
parser = argparse.ArgumentParser(description="Format ichorCNA .seg.txt file ")
parser.add_argument("-s", "--segmentation_file", required=True, help="Input ichorCNA .seg file")
parser.add_argument("-o", "--output", default=None, help="Output formatted .seg file")
args = parser.parse_args()

# Create file
df = pd.read_csv(args.segmentation_file, sep="\t", dtype=str)

# Check columns in .seg.txt file
required_cols = ["chrom", "start", "end", "logR_Copy_Number", "ID"]
missing = [c for c in required_cols if c not in df.columns]
if missing:
raise ValueError(f"There mandatory columns are missing {args.segmentation_file}: {missing}")

# Rename columns as required for CX signatures
df_out = (
df.loc[:, required_cols]
.rename(columns={
"chrom": "chromosome",
"logR_Copy_Number": "segVal",
"ID": "sample"
})
)

# Convert to numeric
for col in ["start", "end", "segVal"]:
df_out[col] = pd.to_numeric(df_out[col], errors="coerce")

# Remove NAs
df_out = df_out.dropna(subset=["segVal"])

# Output file
out_file = args.output

# Save formatted segmentation file
df_out.to_csv(out_file, sep="\t", index=False)


if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,15 @@ process {
ext.when = params.ichorcna_ploidy_aware_plot
}

withName: FORMAT_ICHORCNA_SEG {
publishDir = [
path: { "${params.outdir}/ichorcna/formatted_segmentation_files" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.when = params.compute_signatures
}

withName: CONCATENATE_QDNASEQ_PLOTS {
publishDir = [
path: { "${params.outdir}/cn_plots/qdnaseq/" },
Expand Down
23 changes: 23 additions & 0 deletions modules/local/format_ichorcna_seg/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
process FORMAT_ICHORCNA_SEG {
tag "$meta.id"
container "quay.io/einar_rainhart/pandas-pandera:1.5.3"
label 'process_low'

input:
tuple val(meta), path(seg)

output:
path "${meta.id}_formatted.seg", emit: seg

script:
def VERSION = '0.0.1'

"""
reformat_ichorcna_seg.py -s $seg --o ${meta.id}_formatted.seg

cat <<-END_VERSIONS > versions.yml
"${task.process}":
format_ichorcna: ${VERSION}
END_VERSIONS
"""
}
28 changes: 22 additions & 6 deletions subworkflows/local/ichorcna/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include { HMMCOPY_READCOUNTER as HMMCOPY_READCOUNTER_ICHORCNA } from '../../../m
include { CORRECT_LOGR_ICHORCNA } from '../../../modules/local/correct_logR_ichorcna/main'
include { CONCATENATE_PDF as CONCATENATE_BIN_PLOTS } from '../../../modules/local/concatenate_pdf/main'
include { PLOT_ICHORCNA } from '../../../modules/local/plot_ichorcna/main'
include { FORMAT_ICHORCNA_SEG } from '../../../modules/local/format_ichorcna_seg/main'

workflow ICHORCNA {
take:
Expand Down Expand Up @@ -76,11 +77,26 @@ workflow ICHORCNA {
CONCATENATE_BIN_PLOTS(ICHORCNA_RUN.out.genome_plot.collect { _meta, plot -> plot })
ch_versions = ch_versions.mix(CONCATENATE_BIN_PLOTS.out.versions)

// Create file for signature analysis

formatted_ichor = FORMAT_ICHORCNA_SEG(ICHORCNA_RUN.out.seg_txt)

signature_file_ichor = formatted_ichor
.collectFile(
storeDir: "${params.outdir}/ichorcna/",
name: "all_segments_ichorcna_signatures.seg",
keepHeader: true,
skip: 1
)



emit:
versions = ch_versions
summary = ch_reports
ch_segments = called_segments
ch_bins = bins
gistic_file = corrected_gistic_file
genome_plot = genome_plot
versions = ch_versions
summary = ch_reports
ch_segments = called_segments
ch_bins = bins
gistic_file = corrected_gistic_file
genome_plot = genome_plot
signature_file = signature_file_ichor
}
5 changes: 5 additions & 0 deletions subworkflows/local/solid_biopsy/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ workflow SOLID_BIOPSY {
corrected_gistic_file = ICHORCNA.out.gistic_file
ch_reports = ch_versions.mix(ICHORCNA.out.summary)
ch_versions = ch_versions.mix(ICHORCNA.out.versions)

// FIXME: Compute signatures (duplication with ASCAT.sc)
CIN_SIGNATURE_QUANTIFICATION(ICHORCNA.out.signature_file)
ch_versions = ch_versions.mix(CIN_SIGNATURE_QUANTIFICATION.out.versions)
ch_reports = ch_reports.mix(CIN_SIGNATURE_QUANTIFICATION.out.sig_activity_plot)
}
else {
error("Unknown CNV caller ${caller}")
Expand Down