diff --git a/conf/modules.config b/conf/modules.config index 87b4badb..7560d5bd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -696,7 +696,7 @@ if(params.run_crosslinking) { path: { "${params.outdir}/04_crosslinks/icountmini_summaries" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false + enabled: params.skip_bowtie ] } diff --git a/nextflow.config b/nextflow.config index f3f020ec..fd9d8574 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,7 +53,7 @@ params { skip_fastqc = false skip_umi_extract = true skip_trimming = false - skip_transcriptome = true + // Output params save_reference = false @@ -79,6 +79,8 @@ params { extra_trimgalore_args = "--length 10" // Alignment + skip_transcriptome = true + skip_bowtie = false save_unaligned = true // Must always be true for unmapped bt to pass to star bowtie_params = "-v 2 -m 100 --norc --best --strata" star_params = "--outFilterMultimapNmax 1 --outFilterMultimapScoreRange 1 --outSAMattributes All --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterType BySJout --alignIntronMin 20 --alignIntronMax 1000000 --outFilterScoreMin 10 --alignEndsType Extend5pOfRead1 --twopassMode Basic" diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index c1a3257a..cad66fd8 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -43,6 +43,7 @@ workflow PREPARE_GENOME { regions_resolved_gtf // file: .gtf skip_filter_gtf // value: boolean skip_transcriptome // value: boolean + skip_bowtie // value: boolean main: @@ -107,18 +108,22 @@ workflow PREPARE_GENOME { // // MODULES: Uncompress Bowtie index or generate if required // + ch_bt_index = Channel.empty() - if (ncrna_genome_index) { - if (ncrna_genome_index.toString().endsWith(".tar.gz")) { - ch_bt_index = UNTAR_BT ( [ [:], ncrna_genome_index ] ).untar - ch_versions = ch_versions.mix(UNTAR_BT.out.versions) - } else { - ch_bt_index = Channel.of([ [:] , ncrna_genome_index ]) + + if (!skip_bowtie) { + if (ncrna_genome_index) { + if (ncrna_genome_index.toString().endsWith(".tar.gz")) { + ch_bt_index = UNTAR_BT ( [ [:], ncrna_genome_index ] ).untar + ch_versions = ch_versions.mix(UNTAR_BT.out.versions) + } else { + ch_bt_index = Channel.of([ [:] , ncrna_genome_index ]) + } + } + else { + ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta ).index + ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) } - } - else { - ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta ).index - ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) } // diff --git a/subworkflows/local/rna_align.nf b/subworkflows/local/rna_align.nf index a6e00c3b..ca4649ef 100644 --- a/subworkflows/local/rna_align.nf +++ b/subworkflows/local/rna_align.nf @@ -33,52 +33,77 @@ workflow RNA_ALIGN { gtf // channel: [ val(meta), [ gtf ] ] fasta // channel: [ val(meta), [ fasta/fa ] skip_transcriptome // boolean + skip_bowtie // boolean main: ch_versions = Channel.empty() // // MODULE: Align reads to ncrna genome // - BOWTIE_ALIGN ( - fastq, - bt_index, - true - ) - ch_versions = ch_versions.mix(BOWTIE_ALIGN.out.versions) - // - // SUBWORKFLOW: Sort, index BAM file - // - SAMTOOLS_SORT_NCRNA( BOWTIE_ALIGN.out.bam, fasta ) - SAMTOOLS_INDEX_NCRNA( SAMTOOLS_SORT_NCRNA.out.bam ) + unmapped_fastq = fastq + premapping_log = Channel.empty() + premapped_bam = Channel.empty() + premapped_bai = Channel.empty() + + premapped_k1_bam = Channel.empty() + premapped_k1_bai = Channel.empty() + + if (!skip_bowtie) { + BOWTIE_ALIGN ( + fastq, + bt_index, + true + ) + ch_versions = ch_versions.mix(BOWTIE_ALIGN.out.versions) + + // + // SUBWORKFLOW: Sort, index BAM file + // + SAMTOOLS_SORT_NCRNA( BOWTIE_ALIGN.out.bam ) + SAMTOOLS_INDEX_NCRNA( SAMTOOLS_SORT_NCRNA.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA.out.versions) - /* - * MODULE: Align reads to smrna genome, here allowing 100 multimappers but only reporting one alignment per multimapped read - * so that we can accurately count it in the crosslink summary later - */ + unmapped_fastq = BOWTIE_ALIGN.out.fastq + premapping_log = BOWTIE_ALIGN.out.log + premapped_bam = SAMTOOLS_SORT_NCRNA.out.bam + premapped_bai = SAMTOOLS_INDEX_NCRNA.out.bai - BOWTIE_ALIGN_K1 ( - fastq, - bt_index, - true - ) - ch_versions = ch_versions.mix(BOWTIE_ALIGN_K1.out.versions) + /* + * MODULE: Align reads to smrna genome, here allowing 100 multimappers but only reporting one alignment per multimapped read + * so that we can accurately count it in the crosslink summary later + */ + + BOWTIE_ALIGN_K1 ( + fastq, + bt_index, + true + ) + ch_versions = ch_versions.mix(BOWTIE_ALIGN_K1.out.versions) - SAMTOOLS_SORT_NCRNA_K1 ( BOWTIE_ALIGN_K1.out.bam, fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA_K1.out.versions) + SAMTOOLS_SORT_NCRNA_K1 ( BOWTIE_ALIGN_K1.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_NCRNA_K1.out.versions) + + SAMTOOLS_INDEX_NCRNA_K1 ( SAMTOOLS_SORT_NCRNA_K1.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA_K1.out.versions) + + premapped_k1_bam = SAMTOOLS_SORT_NCRNA_K1.out.bam + premapped_k1_bai = SAMTOOLS_INDEX_NCRNA_K1.out.bai + + } + - SAMTOOLS_INDEX_NCRNA_K1 ( SAMTOOLS_SORT_NCRNA_K1.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_NCRNA_K1.out.versions) + + // // MODULE: Align reads that did not align to the ncrna genome to the primary genome // if (skip_transcriptome) { STAR_ALIGN_GENOME_ONLY ( - BOWTIE_ALIGN.out.fastq, + unmapped_fastq, star_index, gtf, false, @@ -129,7 +154,7 @@ workflow RNA_ALIGN { ch_transcript_multi_bai = [] } else { STAR_ALIGN_WITH_TRANSCRIPTOME ( - BOWTIE_ALIGN.out.fastq, + unmapped_fastq, star_index, gtf, false, @@ -210,11 +235,11 @@ workflow RNA_ALIGN { emit: - ncrna_bam = SAMTOOLS_SORT_NCRNA.out.bam // channel: [ val(meta), [ bam ] ] - ncrna_bai = SAMTOOLS_INDEX_NCRNA.out.bai // channel: [ val(meta), [ bai ] ] - ncrna_log = BOWTIE_ALIGN.out.log // channel: [ val(meta), [ txt ] ] - ncrna_k1_bam = SAMTOOLS_SORT_NCRNA_K1.out.bam // channel: [ val(meta), [ bam ] ] - ncrna_k1_bai = SAMTOOLS_INDEX_NCRNA_K1.out.bai // channel: [ val(meta), [ bai ] ] + ncrna_bam = premapped_bam // channel: [ val(meta), [ bam ] ] + ncrna_bai = premapped_bai // channel: [ val(meta), [ bai ] ] + ncrna_log = premapping_log // channel: [ val(meta), [ txt ] ] + ncrna_k1_bam = premapped_k1_bam // channel: [ val(meta), [ bam ] ] + ncrna_k1_bai = premapped_k1_bai // channel: [ val(meta), [ bai ] ] genome_log = ch_genome_log // channel: [ val(meta), [ txt ] ] genome_log_final = ch_genome_log_final // channel: [ val(meta), [ txt ] ] diff --git a/tests/nf_test/test/test_only_alignment.nf.test b/tests/nf_test/test/test_only_alignment.nf.test index 396d95fb..82e4e521 100644 --- a/tests/nf_test/test/test_only_alignment.nf.test +++ b/tests/nf_test/test/test_only_alignment.nf.test @@ -54,4 +54,34 @@ nextflow_pipeline { assert !(file("$outputDir/02_alignment/genome/PHO92_A_uniqueMapped.bam.bai").exists()) } } + + test("skip_bowtie_alignment") { + tag "skip_bowtie_alignment" + tag "test" + when { + params { + outdir = "$outputDir" + skip_bowtie = true + } + } + + then { + assert workflow.success + + // NCRNA ALIGNMENT + assert !(file("$outputDir/02_alignment/ncrna").exists()) + assert !(file("$outputDir/02_alignment/ncrna/PHO92_A_ncrna.sorted.bam.bai").exists()) + assert !(file("$outputDir/02_alignment/ncrna/unmapped/PHO92_A_ncrna.unmapped.fastq.gz").exists()) + + // icountmini summaries + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_gene.tsv").exists() + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_subtype.tsv").exists() + assert file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_type.tsv").exists() + + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_gene_premapadjusted.tsv").exists() + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_subtype_premapadjusted.tsv").exists() + assert !file("$outputDir/04_crosslinks/icountmini_summaries/PHO92.summary_type_premapadjusted.tsv").exists() + + } + } } diff --git a/workflows/clipseq.nf b/workflows/clipseq.nf index a3078633..a1fb023c 100644 --- a/workflows/clipseq.nf +++ b/workflows/clipseq.nf @@ -228,6 +228,8 @@ workflow CLIPSEQ { ch_regions_gtf, ch_regions_filt_gtf, ch_regions_resolved_gtf, + ch_regions_resolved_gtf_genic, + params.skip_bowtie, params.skip_filter_gtf, params.skip_transcriptome ) @@ -295,7 +297,8 @@ workflow CLIPSEQ { ch_genome_index, ch_gtf, ch_fasta, - params.skip_transcriptome + params.skip_transcriptome, + params.skip_bowtie ) ch_versions = ch_versions.mix(RNA_ALIGN.out.versions) ch_ncrna_bam = RNA_ALIGN.out.ncrna_bam