From 742dc95a36ef70cb493492455fd4184259bae098 Mon Sep 17 00:00:00 2001 From: karenirawan Date: Tue, 25 Mar 2025 15:14:36 +0000 Subject: [PATCH 1/4] rna_align.nf - added line 36 skip_bowtie input as Boolean\defined unmapped fastq as input fastq line 45define empty channel premapping log line 46 added if skip_bowtie , changed input for star align to unmapped fastq added skip_bowtie to nextflow.config = false added skip_bowtie param to clipseq workflow --- subworkflows/local/rna_align.nf | 79 ++++++++++++++++++++------------- workflows/clipseq.nf | 3 +- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/subworkflows/local/rna_align.nf b/subworkflows/local/rna_align.nf index a8d82e83..f5c0518c 100644 --- a/subworkflows/local/rna_align.nf +++ b/subworkflows/local/rna_align.nf @@ -33,6 +33,7 @@ workflow RNA_ALIGN { gtf // channel: [ val(meta), [ gtf ] ] fasta // channel: [ val(meta), [ fasta/fa ] skip_transcriptome // boolean + skip_bowtie // boolean main: ch_versions = Channel.empty() @@ -40,46 +41,60 @@ workflow RNA_ALIGN { // // MODULE: Align reads to ncrna genome // - BOWTIE_ALIGN ( - fastq, - bt_index, - true - ) - ch_versions = ch_versions.mix(BOWTIE_ALIGN.out.versions) - // - // SUBWORKFLOW: Sort, index BAM file - // - SAMTOOLS_SORT_NCRNA( BOWTIE_ALIGN.out.bam ) - SAMTOOLS_INDEX_NCRNA( SAMTOOLS_SORT_NCRNA.out.bam ) + unmapped_fastq = fastq + premapping_log = Channel.empty() + premapped_bam = Channel.empty() + premapped_bai = Channel.empty() - ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA.out.versions) + if (!skip_bowtie) { + BOWTIE_ALIGN ( + fastq, + bt_index, + true + ) + ch_versions = ch_versions.mix(BOWTIE_ALIGN.out.versions) - /* - * MODULE: Align reads to smrna genome, here allowing 100 multimappers but only reporting one alignment per multimapped read - * so that we can accurately count it in the crosslink summary later - */ + // + // SUBWORKFLOW: Sort, index BAM file + // + SAMTOOLS_SORT_NCRNA( BOWTIE_ALIGN.out.bam ) + SAMTOOLS_INDEX_NCRNA( SAMTOOLS_SORT_NCRNA.out.bam ) - BOWTIE_ALIGN_K1 ( - fastq, - bt_index, - true - ) - ch_versions = ch_versions.mix(BOWTIE_ALIGN_K1.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA.out.versions) - SAMTOOLS_SORT_NCRNA_K1 ( BOWTIE_ALIGN_K1.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA_K1.out.versions) + unmapped_fastq = BOWTIE_ALIGN.out.fastq + premapping_log = BOWTIE_ALIGN.out.log + premapped_bam = SAMTOOLS_SORT_NCRNA.out.bam + premapped_bai = SAMTOOLS_INDEX_NCRNA.out.bai + + } + + /* + * MODULE: Align reads to smrna genome, here allowing 100 multimappers but only reporting one alignment per multimapped read + * so that we can accurately count it in the crosslink summary later + */ + + BOWTIE_ALIGN_K1 ( + fastq, + bt_index, + true + ) + ch_versions = ch_versions.mix(BOWTIE_ALIGN_K1.out.versions) - SAMTOOLS_INDEX_NCRNA_K1 ( SAMTOOLS_SORT_NCRNA_K1.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA_K1.out.versions) + SAMTOOLS_SORT_NCRNA_K1 ( BOWTIE_ALIGN_K1.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA_K1.out.versions) + SAMTOOLS_INDEX_NCRNA_K1 ( SAMTOOLS_SORT_NCRNA_K1.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA_K1.out.versions) + // // MODULE: Align reads that did not align to the ncrna genome to the primary genome // if (skip_transcriptome) { STAR_ALIGN_GENOME_ONLY ( - BOWTIE_ALIGN.out.fastq, + unmapped_fastq, star_index, ch_gtf, false, @@ -130,7 +145,7 @@ workflow RNA_ALIGN { ch_transcript_multi_bai = [] } else { STAR_ALIGN_WITH_TRANSCRIPTOME ( - BOWTIE_ALIGN.out.fastq, + unmapped_fastq, star_index, ch_gtf, false, @@ -211,9 +226,9 @@ workflow RNA_ALIGN { emit: - ncrna_bam = SAMTOOLS_SORT_NCRNA.out.bam // channel: [ val(meta), [ bam ] ] - ncrna_bai = SAMTOOLS_INDEX_NCRNA.out.bai // channel: [ val(meta), [ bai ] ] - ncrna_log = BOWTIE_ALIGN.out.log // channel: [ val(meta), [ txt ] ] + ncrna_bam = premapped_bam // channel: [ val(meta), [ bam ] ] + ncrna_bai = premapped_bai // channel: [ val(meta), [ bai ] ] + ncrna_log = premapping_log // channel: [ val(meta), [ txt ] ] ncrna_k1_bam = SAMTOOLS_SORT_NCRNA_K1.out.bam // channel: [ val(meta), [ bam ] ] ncrna_k1_bai = SAMTOOLS_INDEX_NCRNA_K1.out.bai // channel: [ val(meta), [ bai ] ] diff --git a/workflows/clipseq.nf b/workflows/clipseq.nf index 7822b4a6..34b97e18 100644 --- a/workflows/clipseq.nf +++ b/workflows/clipseq.nf @@ -313,7 +313,8 @@ workflow CLIPSEQ { ch_genome_index, ch_filtered_gtf, ch_fasta, - params.skip_transcriptome + params.skip_transcriptome, + params.skip_bowtie ) ch_versions = ch_versions.mix(RNA_ALIGN.out.versions) ch_ncrna_bam = RNA_ALIGN.out.ncrna_bam From 310ec7edffb73f4a7bdda87fdb05b0a65ef16604 Mon Sep 17 00:00:00 2001 From: karenirawan Date: Tue, 25 Mar 2025 16:43:26 +0000 Subject: [PATCH 2/4] skip bowtie index creation, skip bowtie alignment, enable icount_summary IF bowtie alignment skipped --- conf/modules.config | 2 +- subworkflows/local/prepare_genome.nf | 25 +++++++++++++++---------- subworkflows/local/rna_align.nf | 20 +++++++++++++++----- workflows/clipseq.nf | 3 ++- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f5928fd6..7785cb2e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -724,7 +724,7 @@ if(params.run_crosslinking) { path: { "${params.outdir}/04_crosslinks/icountmini_summaries" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false + enabled: params.skip_bowtie ] } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 45934881..dd0e6c05 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -46,6 +46,7 @@ workflow PREPARE_GENOME { regions_filt_gtf // file: .gtf regions_resolved_gtf // file: .gtf regions_resolved_gtf_genic // file: .gtf + skip_bowtie // boolean main: @@ -111,18 +112,22 @@ workflow PREPARE_GENOME { // // MODULES: Uncompress Bowtie index or generate if required // + ch_bt_index = Channel.empty() - if (ncrna_genome_index) { - if (ncrna_genome_index.toString().endsWith(".tar.gz")) { - ch_bt_index = UNTAR_BT ( [ [:], ncrna_genome_index ] ).untar - ch_versions = ch_versions.mix(UNTAR_BT.out.versions) - } else { - ch_bt_index = Channel.of([ [:] , ncrna_genome_index ]) + + if (!skip_bowtie) { + if (ncrna_genome_index) { + if (ncrna_genome_index.toString().endsWith(".tar.gz")) { + ch_bt_index = UNTAR_BT ( [ [:], ncrna_genome_index ] ).untar + ch_versions = ch_versions.mix(UNTAR_BT.out.versions) + } else { + ch_bt_index = Channel.of([ [:] , ncrna_genome_index ]) + } + } + else { + ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta ).index + ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) } - } - else { - ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta ).index - ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) } // diff --git a/subworkflows/local/rna_align.nf b/subworkflows/local/rna_align.nf index f5c0518c..5d722322 100644 --- a/subworkflows/local/rna_align.nf +++ b/subworkflows/local/rna_align.nf @@ -47,6 +47,9 @@ workflow RNA_ALIGN { premapped_bam = Channel.empty() premapped_bai = Channel.empty() + premapped_k1_bam = Channel.empty() + premapped_k1_bai = Channel.empty() + if (!skip_bowtie) { BOWTIE_ALIGN ( fastq, @@ -69,9 +72,7 @@ workflow RNA_ALIGN { premapped_bam = SAMTOOLS_SORT_NCRNA.out.bam premapped_bai = SAMTOOLS_INDEX_NCRNA.out.bai - } - - /* + /* * MODULE: Align reads to smrna genome, here allowing 100 multimappers but only reporting one alignment per multimapped read * so that we can accurately count it in the crosslink summary later */ @@ -88,6 +89,15 @@ workflow RNA_ALIGN { SAMTOOLS_INDEX_NCRNA_K1 ( SAMTOOLS_SORT_NCRNA_K1.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA_K1.out.versions) + + premapped_k1_bam = SAMTOOLS_SORT_NCRNA_K1.out.bam + premapped_k1_bai = SAMTOOLS_INDEX_NCRNA_K1.out.bai + + } + + + + // // MODULE: Align reads that did not align to the ncrna genome to the primary genome @@ -229,8 +239,8 @@ workflow RNA_ALIGN { ncrna_bam = premapped_bam // channel: [ val(meta), [ bam ] ] ncrna_bai = premapped_bai // channel: [ val(meta), [ bai ] ] ncrna_log = premapping_log // channel: [ val(meta), [ txt ] ] - ncrna_k1_bam = SAMTOOLS_SORT_NCRNA_K1.out.bam // channel: [ val(meta), [ bam ] ] - ncrna_k1_bai = SAMTOOLS_INDEX_NCRNA_K1.out.bai // channel: [ val(meta), [ bai ] ] + ncrna_k1_bam = premapped_k1_bam // channel: [ val(meta), [ bam ] ] + ncrna_k1_bai = premapped_k1_bai // channel: [ val(meta), [ bai ] ] genome_log = ch_genome_log // channel: [ val(meta), [ txt ] ] genome_log_final = ch_genome_log_final // channel: [ val(meta), [ txt ] ] diff --git a/workflows/clipseq.nf b/workflows/clipseq.nf index 34b97e18..d816774a 100644 --- a/workflows/clipseq.nf +++ b/workflows/clipseq.nf @@ -243,7 +243,8 @@ workflow CLIPSEQ { ch_regions_gtf, ch_regions_filt_gtf, ch_regions_resolved_gtf, - ch_regions_resolved_gtf_genic + ch_regions_resolved_gtf_genic, + params.skip_bowtie ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_fasta = PREPARE_GENOME.out.fasta From 4cb8235ee416604fdd73f713e25a88f9dd097cce Mon Sep 17 00:00:00 2001 From: karenirawan Date: Wed, 26 Mar 2025 14:03:40 +0000 Subject: [PATCH 3/4] add skip_bowtie parameter --- nextflow.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9ad1278a..25f8fd0f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,7 +51,7 @@ params { skip_fastqc = false skip_umi_extract = true skip_trimming = false - skip_transcriptome = true + // Output params save_reference = false @@ -77,6 +77,8 @@ params { extra_trimgalore_args = "--length 10" // Alignment + skip_transcriptome = true + skip_bowtie = false save_unaligned = true // Must always be true for unmapped bt to pass to star bowtie_params = "-v 2 -m 100 --norc --best --strata" star_params = "--outFilterMultimapNmax 1 --outFilterMultimapScoreRange 1 --outSAMattributes All --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterType BySJout --alignIntronMin 20 --alignIntronMax 1000000 --outFilterScoreMin 10 --alignEndsType Extend5pOfRead1 --twopassMode Basic" From 2414dfec832094a00d1452eed48cbc22b0c15668 Mon Sep 17 00:00:00 2001 From: karenirawan Date: Wed, 26 Mar 2025 14:59:03 +0000 Subject: [PATCH 4/4] added a test for skip_bowtie option --- .../nf_test/test/test_only_alignment.nf.test | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/nf_test/test/test_only_alignment.nf.test b/tests/nf_test/test/test_only_alignment.nf.test index 396d95fb..82e4e521 100644 --- a/tests/nf_test/test/test_only_alignment.nf.test +++ b/tests/nf_test/test/test_only_alignment.nf.test @@ -54,4 +54,34 @@ nextflow_pipeline { assert !(file("$outputDir/02_alignment/genome/PHO92_A_uniqueMapped.bam.bai").exists()) } } + + test("skip_bowtie_alignment") { + tag "skip_bowtie_alignment" + tag "test" + when { + params { + outdir = "$outputDir" + skip_bowtie = true + } + } + + then { + assert workflow.success + + // NCRNA ALIGNMENT + assert !(file("$outputDir/02_alignment/ncrna").exists()) + assert !(file("$outputDir/02_alignment/ncrna/PHO92_A_ncrna.sorted.bam.bai").exists()) + assert !(file("$outputDir/02_alignment/ncrna/unmapped/PHO92_A_ncrna.unmapped.fastq.gz").exists()) + + // icountmini summaries + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_gene.tsv").exists() + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_subtype.tsv").exists() + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_type.tsv").exists() + + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_gene_premapadjusted.tsv").exists() + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_subtype_premapadjusted.tsv").exists() + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_type_premapadjusted.tsv").exists() + + } + } }