diff --git a/nextflow/modules/bamtools/merge/main.nf b/nextflow/modules/bamtools/merge/main.nf new file mode 100644 index 0000000..530ab57 --- /dev/null +++ b/nextflow/modules/bamtools/merge/main.nf @@ -0,0 +1,16 @@ +process bamtools_merge { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.5.1' + + input: + tuple val(sample), val(bam) + + output: + path "${sample}.bam", emit: bam + + shell: + """ + bamtools merge -out ${sample}.bam -in ${bam} + """ +} \ No newline at end of file diff --git a/nextflow/modules/ccs/main.nf b/nextflow/modules/ccs/main.nf new file mode 100644 index 0000000..706da37 --- /dev/null +++ b/nextflow/modules/ccs/main.nf @@ -0,0 +1,16 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::pbccs=6.0.0' + + input: + path bam + + output: + path 'ccs.bam', emit: bam + + shell: + """ + ccs ${bam} ccs.bam + """ +} \ No newline at end of file diff --git a/nextflow/modules/lima/demux/main.nf b/nextflow/modules/lima/demux/main.nf new file mode 100644 index 0000000..c26ea50 --- /dev/null +++ b/nextflow/modules/lima/demux/main.nf @@ -0,0 +1,29 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.0.0' + + input: + path ccs_bam + path barcodes_fasta + + output: + path 'demux.*', emit: bams + + shell: + def cores = 16 + if (task.cpus) { + cores = (task.cpus as int) + } + """ + lima --num-threads ${cores} \ + --split-bam-named \ + --different \ + --ccs \ + --min-score-lead 10 \ + --min-score 80 \ + ${ccs_bam} \ + ${barcodes_fasta} \ + demux.bam + """ +} \ No newline at end of file diff --git a/nextflow/modules/lima/trim_amplicons/main.nf b/nextflow/modules/lima/trim_amplicons/main.nf new file mode 100644 index 0000000..51a9383 --- /dev/null +++ b/nextflow/modules/lima/trim_amplicons/main.nf @@ -0,0 +1,34 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.0.0' + + input: + path demux_bam + path amplicon_primers_fasta + + output: + path 'output.bam', emit: bams + + shell: + def cores = 16 + if (task.cpus) { + cores = (task.cpus as int) + } + def primers_arg = "--neighbors" + if (params.lima_trim_amplicons_neighbors) primers_arg = "--neighbors" + else if (params.lima_trim_amplicons_different) primers_arg = "--different" + else log.info "[lima trim amplicons] defaulting to use --neighbors" + """ + lima \ + --num-threads ${cores} \ + -j 30 \ + --${primers_arg} \ + --ccs \ + --min-score-lead 10 \ + --min-score 80 + ${demux_bam} \ + ${amplicon_primers_fasta} \ + output.bam + """ +} \ No newline at end of file diff --git a/nextflow/workflow/main.nf b/nextflow/workflow/main.nf new file mode 100644 index 0000000..40bd7da --- /dev/null +++ b/nextflow/workflow/main.nf @@ -0,0 +1,109 @@ +#!/usr/bin/env nextflow + +// DSL2 is used +nextflow.enable.dsl=2 + +/*----------------------------------------------------------------------------- + Pipeline Processes (includes) +-----------------------------------------------------------------------------*/ + +include { ccs } from '../modules/ccs/main' +include { lima_demux } from '../modules/lima/demux/main' +include { lima_trim_amplicons } from '../modules/lima/trim_amplicons/main' +include { combine_demux_by_patient } from '../modules/combine_demux_by_patient/main' +include { bamtools_merge } from '../modules/bamtools/merge/main' + +/*----------------------------------------------------------------------------- + Pipeline Parameters +-----------------------------------------------------------------------------*/ + +if (!params.subreads_bam) { + exit 1, "[Pipeline Error] Missing parameter 'subreads_bam'\n" +} + +if (!params.sample_metadata) { + exit 1, "[Pipeline Error] Missing parameter 'sample_metadata'\n" +} + +if (!params.amplicon_primers_fasta) { + exit 1, "[Pipeline Error] Missing parameter 'amplicon_primers_fasta'\n" +} + +if (!params.outdir) { + exit 1, "[Pipeline Error] Missing parameter 'outdir'\n" +} + +/*----------------------------------------------------------------------------- + Main Workflow +-----------------------------------------------------------------------------*/ + +workflow { + + // [Channel] the input subreads BAM (eg. .subreads.bam or movie>..hifi_reads.bam) + ch_in_subreads_bam = Channel.fromPath(params.subreads_bam) + + // [Channel] the input amplicon primers FASTA + // Please note the requirement of having adjacent F/R pairs for the --neighbors algorithm + ch_in_amplicon_primers_fasta = Channel.fromPath(params.amplicon_primers_fasta) + + // [Channel] the sample metadata. Should contain: + // "Sample", "BarcodeF", "BarcodeFName", "BarcodeR", "BarcodeRName" + ch_in_metadata = Channel + .fromPath(params.sample_metadata) + .splitCsv(header: true) + + // [Channel] the barcode FASTA file, one entry per input barcode + ch_in_barcodes_fasta = ch_in_metadata + .map { row -> + ">${row.BarcodeFName}\n${row.BarcodeF}\n>${row.BarcodeRName}\n${row.BarcodeR}" + } + .collectFile(name: 'barcodes.fasta', newLine: true) + + // [Process] call the consensus reads from the subreads + ccs(ch_in_subreads_bam) + + // [Process] demultiplex the CCS reads + lima_demux(ccs.out.subreads_bam, ch_in_barcodes_fasta) + + // [Channel] build tuples of barcode key and BAM file + ch_lima_bams = demux.out.bams.flatten().map { bam -> + ["${bam.baseName}".substring("demux.".length()), bam] + } + + // [[Channel] transform the metadata to tuples of barcode key and sample + ch_in_lima_outputs = ch_in_metadata + .map { row -> + ["${row.BarcodeFName}--${row.BarcodeRName}", row.Sample] + } + + // [[Channel] gather all BAMs for the same sample + ch_bams_by_sample = ch_in_lima_outputs + .join(ch_lima_bams) // join by barcode name key + .map { key, sample, bam -> [sample, bam] } // discard the key + .groupTuple() + + // [Process] combine into per-patient data + bamtools_merge(ch_bams_by_sample) + + // [Process] trim amplicon primers + lima_demux(bamtools_merge.out.bam, ch_in_amplicon_primers_fasta) + + // [Process] variant calling + // TODO: support parameterizign variant callers + // TODO: support bcftools + // TODO: support DeepVariant + // TODO: support pbaa + + // [Process] generate consensus sequence using VCFCons + // TODO: samtools depth + // TODO: run VCFCons + + // [Process] Assign lineages using Pangolin or Nextclade + // TODO + + // TODO: other miscellaneous items + // - downsample reads by amplicon + // - generate per amplicon coverage BED file + // + +} \ No newline at end of file