From 2f97e13129ebf06e9c2ea38af37b56b0d8d281d0 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 19 Feb 2021 17:12:35 -0800 Subject: [PATCH 1/4] [feature] WIP: automate with nextflow --- nextflow/modules/ccs/main.nf | 16 ++++ .../cosa/combine_demux_by_patient/main.nf | 0 nextflow/modules/lima/main.nf | 27 +++++++ nextflow/workflow/main.nf | 74 +++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 nextflow/modules/ccs/main.nf create mode 100644 nextflow/modules/cosa/combine_demux_by_patient/main.nf create mode 100644 nextflow/modules/lima/main.nf create mode 100644 nextflow/workflow/main.nf diff --git a/nextflow/modules/ccs/main.nf b/nextflow/modules/ccs/main.nf new file mode 100644 index 0000000..706da37 --- /dev/null +++ b/nextflow/modules/ccs/main.nf @@ -0,0 +1,16 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::pbccs=6.0.0' + + input: + path bam + + output: + path 'ccs.bam', emit: bam + + shell: + """ + ccs ${bam} ccs.bam + """ +} \ No newline at end of file diff --git a/nextflow/modules/cosa/combine_demux_by_patient/main.nf b/nextflow/modules/cosa/combine_demux_by_patient/main.nf new file mode 100644 index 0000000..e69de29 diff --git a/nextflow/modules/lima/main.nf b/nextflow/modules/lima/main.nf new file mode 100644 index 0000000..48f454a --- /dev/null +++ b/nextflow/modules/lima/main.nf @@ -0,0 +1,27 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.0.0' + + input: + path ccs_bam + + output: + path 'demux.*', emit: bam + + shell: + def cores = 16 + if (task.cpus) { + cores = (task.cpus as int) + } + """ + lima --num-threads ${cores} \ + --split-bam-named \ + --different \ + --ccs \ + --min-score-lead 10 \ + --min-score 80 \ + ${ccs_bam} \ + demux.bam + """ +} \ No newline at end of file diff --git a/nextflow/workflow/main.nf b/nextflow/workflow/main.nf new file mode 100644 index 0000000..e7c495a --- /dev/null +++ b/nextflow/workflow/main.nf @@ -0,0 +1,74 @@ +#!/usr/bin/env nextflow + +// DSL2 is used +nextflow.enable.dsl=2 + +/*----------------------------------------------------------------------------- + Pipeline Processes (includes) +-----------------------------------------------------------------------------*/ + +included { ccs } from '../modules/ccs/main' +included { lima } from '../modules/lima/main' +included { combine_demux_by_patient } from '../modules/combine_demux_by_patient/main' + + +/*----------------------------------------------------------------------------- + Pipeline Parameters +-----------------------------------------------------------------------------*/ + +if (!params.subreads_bam) { + exit 1, "[Pipeline Error] Missing parameter 'subreads_bam'\n" +} + +if (!params.outdir) { + exit 1, "[Pipeline Error] Missing parameter 'outdir'\n" +} + +/*----------------------------------------------------------------------------- + Main Workflow +-----------------------------------------------------------------------------*/ + +workflow { + // [Channel] the input subreads BAM (eg. .subreads.bam or movie>..hifi_reads.bam) + ch_in_subreads_bam = Channel.fromPath(params.subreads_bam) + + // [Process] call the consensus reads from the subreads + ccs(ch_in_subreads_bam) + + // [Process] demultiplex the CCS reads + lima(ccs.out.subreads_bam) + + // [Process] combine into per-patient data + // TODO generate the tab-delimited data file + // TODO run combine_demux_by_patient + + // [Process] trim amplicon primers + // TODO: support --neigbhors or --different + /* + lima -j 30 --split-bam-named \ + --neighbors \ + --ccs \ + --min-score-lead 10 \ + --min-score 80 + ccs.bam amplicon_primers.fasta output.bam + */ + + // [Process] variant calling + // TODO: support parameterizign variant callers + // TODO: support bcftools + // TODO: support DeepVariant + // TODO: support pbaa + + // [Process] generate consensus sequence using VCFCons + // TODO: samtools depth + // TODO: run VCFCons + + // [Process] Assign lineages using Pangolin or Nextclade + // TODO + + // TODO: other miscellaneous items + // - downsample reads by amplicon + // - generate per amplicon coverage BED file + // + +} \ No newline at end of file From 88e58bdd0a5efa931cd4aaa4d29a63b6e0add059 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Sat, 20 Feb 2021 10:22:40 -0800 Subject: [PATCH 2/4] brainstorming --- nextflow/modules/bamtools/merge/main.nf | 16 ++++++ .../cosa/combine_demux_by_patient/main.nf | 0 nextflow/modules/lima/main.nf | 4 +- nextflow/workflow/main.nf | 49 +++++++++++++------ 4 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 nextflow/modules/bamtools/merge/main.nf delete mode 100644 nextflow/modules/cosa/combine_demux_by_patient/main.nf diff --git a/nextflow/modules/bamtools/merge/main.nf b/nextflow/modules/bamtools/merge/main.nf new file mode 100644 index 0000000..530ab57 --- /dev/null +++ b/nextflow/modules/bamtools/merge/main.nf @@ -0,0 +1,16 @@ +process bamtools_merge { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.5.1' + + input: + tuple val(sample), val(bam) + + output: + path "${sample}.bam", emit: bam + + shell: + """ + bamtools merge -out ${sample}.bam -in ${bam} + """ +} \ No newline at end of file diff --git a/nextflow/modules/cosa/combine_demux_by_patient/main.nf b/nextflow/modules/cosa/combine_demux_by_patient/main.nf deleted file mode 100644 index e69de29..0000000 diff --git a/nextflow/modules/lima/main.nf b/nextflow/modules/lima/main.nf index 48f454a..c26ea50 100644 --- a/nextflow/modules/lima/main.nf +++ b/nextflow/modules/lima/main.nf @@ -5,9 +5,10 @@ process ccs { input: path ccs_bam + path barcodes_fasta output: - path 'demux.*', emit: bam + path 'demux.*', emit: bams shell: def cores = 16 @@ -22,6 +23,7 @@ process ccs { --min-score-lead 10 \ --min-score 80 \ ${ccs_bam} \ + ${barcodes_fasta} \ demux.bam """ } \ No newline at end of file diff --git a/nextflow/workflow/main.nf b/nextflow/workflow/main.nf index e7c495a..4471abe 100644 --- a/nextflow/workflow/main.nf +++ b/nextflow/workflow/main.nf @@ -7,10 +7,10 @@ nextflow.enable.dsl=2 Pipeline Processes (includes) -----------------------------------------------------------------------------*/ -included { ccs } from '../modules/ccs/main' -included { lima } from '../modules/lima/main' -included { combine_demux_by_patient } from '../modules/combine_demux_by_patient/main' - +include { ccs } from '../modules/ccs/main' +include { lima } from '../modules/lima/main' +include { combine_demux_by_patient } from '../modules/combine_demux_by_patient/main' +include { bamtools_merge } from '../modules/bamtools/merge/main' /*----------------------------------------------------------------------------- Pipeline Parameters @@ -29,29 +29,48 @@ if (!params.outdir) { -----------------------------------------------------------------------------*/ workflow { + // [Channel] the input subreads BAM (eg. .subreads.bam or movie>..hifi_reads.bam) ch_in_subreads_bam = Channel.fromPath(params.subreads_bam) + // [Channel] the sample metadata. Should contain: + // "Sample", "BarcodeF", "BarcodeFName", "BarcodeR", "BarcodeRName" + ch_in_metadata = Channel + .fromPath(params.sample_metadata) + .splitCsv(header: true) + + // [Channel] the barcode FASTA file, one entry per input barcode + ch_in_barcodes_fasta = ch_in_metadata + .collectFile(name: 'barcodes.fasta', newLine: true) + // [Process] call the consensus reads from the subreads ccs(ch_in_subreads_bam) // [Process] demultiplex the CCS reads - lima(ccs.out.subreads_bam) + lima(ccs.out.subreads_bam, ch_in_barcodes_fasta) + + // [Channel] build tuples of barcode key and BAM file + ch_lima_bams = lima.out.bams.map { bam -> + (bam.baseName.substring("demux.".length()), bam) + } + + // [[Channel] transform the metadata to tuples of barcode key and sample + ch_in_lima_outputs = ch_in_metadata + .map { (sample, barcodeF, barcodeFName, barcodeR, barcodeRName) -> + ("${barcodeFName}--${barcodeRName}", sample) + } + + // [[Channel] gather all BAMs for the same sample + ch_bams_by_sample = ch_in_lima_outputs + .join(ch_lima_bams) // join by barcode name key + .map { (key, sample, bam) -> (sample, bam) } // discard the key + .groupTuple() // collect all BAMs by sample name // [Process] combine into per-patient data - // TODO generate the tab-delimited data file - // TODO run combine_demux_by_patient + bamtools_merge(ch_bams_by_sample) // [Process] trim amplicon primers // TODO: support --neigbhors or --different - /* - lima -j 30 --split-bam-named \ - --neighbors \ - --ccs \ - --min-score-lead 10 \ - --min-score 80 - ccs.bam amplicon_primers.fasta output.bam - */ // [Process] variant calling // TODO: support parameterizign variant callers From bd3d98258bd0ab1dc73c57ffd623870598e942e9 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Sat, 20 Feb 2021 21:48:31 -0800 Subject: [PATCH 3/4] fix channel transforms to group BAMs to merge --- nextflow/workflow/main.nf | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/nextflow/workflow/main.nf b/nextflow/workflow/main.nf index 4471abe..f662cef 100644 --- a/nextflow/workflow/main.nf +++ b/nextflow/workflow/main.nf @@ -41,6 +41,9 @@ workflow { // [Channel] the barcode FASTA file, one entry per input barcode ch_in_barcodes_fasta = ch_in_metadata + .map { row -> + ">${row.BarcodeFName}\n${row.BarcodeF}\n>${row.BarcodeRName}\n${row.BarcodeR}" + } .collectFile(name: 'barcodes.fasta', newLine: true) // [Process] call the consensus reads from the subreads @@ -50,21 +53,21 @@ workflow { lima(ccs.out.subreads_bam, ch_in_barcodes_fasta) // [Channel] build tuples of barcode key and BAM file - ch_lima_bams = lima.out.bams.map { bam -> - (bam.baseName.substring("demux.".length()), bam) + ch_lima_bams = demux.out.bams.flatten().map { bam -> + ["${bam.baseName}".substring("demux.".length()), bam] } // [[Channel] transform the metadata to tuples of barcode key and sample ch_in_lima_outputs = ch_in_metadata - .map { (sample, barcodeF, barcodeFName, barcodeR, barcodeRName) -> - ("${barcodeFName}--${barcodeRName}", sample) + .map { row -> + ["${row.BarcodeFName}--${row.BarcodeRName}", row.Sample] } // [[Channel] gather all BAMs for the same sample ch_bams_by_sample = ch_in_lima_outputs - .join(ch_lima_bams) // join by barcode name key - .map { (key, sample, bam) -> (sample, bam) } // discard the key - .groupTuple() // collect all BAMs by sample name + .join(ch_lima_bams) // join by barcode name key + .map { key, sample, bam -> [sample, bam] } // discard the key + .groupTuple() // [Process] combine into per-patient data bamtools_merge(ch_bams_by_sample) From b9142df9b3ec2282449f340aa53231aed748a12c Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Sun, 21 Feb 2021 14:45:50 -0800 Subject: [PATCH 4/4] add the step to trim amplicon primers --- nextflow/modules/lima/{ => demux}/main.nf | 0 nextflow/modules/lima/trim_amplicons/main.nf | 34 ++++++++++++++++++++ nextflow/workflow/main.nf | 19 +++++++++-- 3 files changed, 50 insertions(+), 3 deletions(-) rename nextflow/modules/lima/{ => demux}/main.nf (100%) create mode 100644 nextflow/modules/lima/trim_amplicons/main.nf diff --git a/nextflow/modules/lima/main.nf b/nextflow/modules/lima/demux/main.nf similarity index 100% rename from nextflow/modules/lima/main.nf rename to nextflow/modules/lima/demux/main.nf diff --git a/nextflow/modules/lima/trim_amplicons/main.nf b/nextflow/modules/lima/trim_amplicons/main.nf new file mode 100644 index 0000000..51a9383 --- /dev/null +++ b/nextflow/modules/lima/trim_amplicons/main.nf @@ -0,0 +1,34 @@ +process ccs { + publishDir "${params.outdir}", mode: 'copy' + + conda 'bioconda::lima=2.0.0' + + input: + path demux_bam + path amplicon_primers_fasta + + output: + path 'output.bam', emit: bams + + shell: + def cores = 16 + if (task.cpus) { + cores = (task.cpus as int) + } + def primers_arg = "--neighbors" + if (params.lima_trim_amplicons_neighbors) primers_arg = "--neighbors" + else if (params.lima_trim_amplicons_different) primers_arg = "--different" + else log.info "[lima trim amplicons] defaulting to use --neighbors" + """ + lima \ + --num-threads ${cores} \ + -j 30 \ + --${primers_arg} \ + --ccs \ + --min-score-lead 10 \ + --min-score 80 + ${demux_bam} \ + ${amplicon_primers_fasta} \ + output.bam + """ +} \ No newline at end of file diff --git a/nextflow/workflow/main.nf b/nextflow/workflow/main.nf index f662cef..40bd7da 100644 --- a/nextflow/workflow/main.nf +++ b/nextflow/workflow/main.nf @@ -8,7 +8,8 @@ nextflow.enable.dsl=2 -----------------------------------------------------------------------------*/ include { ccs } from '../modules/ccs/main' -include { lima } from '../modules/lima/main' +include { lima_demux } from '../modules/lima/demux/main' +include { lima_trim_amplicons } from '../modules/lima/trim_amplicons/main' include { combine_demux_by_patient } from '../modules/combine_demux_by_patient/main' include { bamtools_merge } from '../modules/bamtools/merge/main' @@ -20,6 +21,14 @@ if (!params.subreads_bam) { exit 1, "[Pipeline Error] Missing parameter 'subreads_bam'\n" } +if (!params.sample_metadata) { + exit 1, "[Pipeline Error] Missing parameter 'sample_metadata'\n" +} + +if (!params.amplicon_primers_fasta) { + exit 1, "[Pipeline Error] Missing parameter 'amplicon_primers_fasta'\n" +} + if (!params.outdir) { exit 1, "[Pipeline Error] Missing parameter 'outdir'\n" } @@ -33,6 +42,10 @@ workflow { // [Channel] the input subreads BAM (eg. .subreads.bam or movie>..hifi_reads.bam) ch_in_subreads_bam = Channel.fromPath(params.subreads_bam) + // [Channel] the input amplicon primers FASTA + // Please note the requirement of having adjacent F/R pairs for the --neighbors algorithm + ch_in_amplicon_primers_fasta = Channel.fromPath(params.amplicon_primers_fasta) + // [Channel] the sample metadata. Should contain: // "Sample", "BarcodeF", "BarcodeFName", "BarcodeR", "BarcodeRName" ch_in_metadata = Channel @@ -50,7 +63,7 @@ workflow { ccs(ch_in_subreads_bam) // [Process] demultiplex the CCS reads - lima(ccs.out.subreads_bam, ch_in_barcodes_fasta) + lima_demux(ccs.out.subreads_bam, ch_in_barcodes_fasta) // [Channel] build tuples of barcode key and BAM file ch_lima_bams = demux.out.bams.flatten().map { bam -> @@ -73,7 +86,7 @@ workflow { bamtools_merge(ch_bams_by_sample) // [Process] trim amplicon primers - // TODO: support --neigbhors or --different + lima_demux(bamtools_merge.out.bam, ch_in_amplicon_primers_fasta) // [Process] variant calling // TODO: support parameterizign variant callers