diff --git a/assets/multiqc/merged_library_frip_score_header.txt b/assets/multiqc/merged_library_frip_score_header.txt index f89a75f6..866af81d 100644 --- a/assets/multiqc/merged_library_frip_score_header.txt +++ b/assets/multiqc/merged_library_frip_score_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_frip_score' -#section_name: 'MERGED LIB: MACS2 peak FRiP score' +#section_name: 'MERGED LIB: MACS2: Peak FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall # into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." diff --git a/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt new file mode 100644 index 00000000..6c5b15bc --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt @@ -0,0 +1,14 @@ +#id: 'mlib_joint_genrich_frip_score' +#section_name: 'MERGED LIB: Genrich (joint): Peak FRiP score' +#description: "is generated by calculating the fraction of all mapped reads that fall +# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted. +# See FRiP score." +#plot_type: 'bargraph' +#anchor: 'mlib_frip_score' +#pconfig: +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt new file mode 100644 index 00000000..e4b361d3 --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt @@ -0,0 +1,9 @@ +#id: 'mlib_joint_genrich_peak_annotation' +#section_name: 'MERGED LIB: Genrich (joint): HOMER peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# HOMER annotatePeaks.pl." +#plot_type: 'bargraph' +#anchor: 'mlib_peak_annotation' +#pconfig: +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt new file mode 100644 index 00000000..1d5e014c --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt @@ -0,0 +1,10 @@ +#id: 'mlib_joint_genrich_peak_count' +#section_name: 'MERGED LIB: Genrich (joint): Peak count' +#description: "is calculated from total number of peaks called by +# Genrich" +#plot_type: 'bargraph' +#anchor: 'mlib_peak_count' +#pconfig: +# title: 'Total peak count' +# ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc/merged_library_peak_annotation_header.txt b/assets/multiqc/merged_library_peak_annotation_header.txt index 6f668c31..f3bc0ac9 100644 --- a/assets/multiqc/merged_library_peak_annotation_header.txt +++ b/assets/multiqc/merged_library_peak_annotation_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_peak_annotation' -#section_name: 'MERGED LIB: HOMER peak annotation' +#section_name: 'MERGED LIB: MACS2: HOMER peak annotation' #description: "is generated by calculating the proportion of peaks assigned to genomic features by # HOMER annotatePeaks.pl." #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_library_peak_count_header.txt b/assets/multiqc/merged_library_peak_count_header.txt index 60f5745c..b6075a03 100644 --- a/assets/multiqc/merged_library_peak_count_header.txt +++ b/assets/multiqc/merged_library_peak_count_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_peak_count' -#section_name: 'MERGED LIB: MACS2 peak count' +#section_name: 'MERGED LIB: MACS2: Peak count' #description: "is calculated from total number of peaks called by # MACS2" #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt new file mode 100644 index 00000000..534c2373 --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt @@ -0,0 +1,14 @@ +#id: 'mlib_sep_genrich_frip_score' +#section_name: 'MERGED LIB: Genrich (sep): Peak FRiP score' +#description: "is generated by calculating the fraction of all mapped reads that fall +# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted. +# See FRiP score." +#plot_type: 'bargraph' +#anchor: 'mlib_frip_score' +#pconfig: +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt new file mode 100644 index 00000000..911e3343 --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt @@ -0,0 +1,9 @@ +#id: 'mlib_sep_genrich_peak_annotation' +#section_name: 'MERGED LIB: Genrich (sep): HOMER peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# HOMER annotatePeaks.pl." +#plot_type: 'bargraph' +#anchor: 'mlib_peak_annotation' +#pconfig: +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt new file mode 100644 index 00000000..4cac61aa --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt @@ -0,0 +1,10 @@ +#id: 'mlib_sep_genrich_peak_count' +#section_name: 'MERGED LIB: Genrich (sep): Peak count' +#description: "is calculated from total number of peaks called by +# Genrich" +#plot_type: 'bargraph' +#anchor: 'mlib_peak_count' +#pconfig: +# title: 'Total peak count' +# ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc/merged_replicate_frip_score_header.txt b/assets/multiqc/merged_replicate_frip_score_header.txt index c0f25a5b..5a17b63f 100644 --- a/assets/multiqc/merged_replicate_frip_score_header.txt +++ b/assets/multiqc/merged_replicate_frip_score_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_frip_score' -#section_name: 'MERGED REP: MACS2 peak FRiP score' +#section_name: 'MERGED REP: MACS: Peak FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall # into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." diff --git a/assets/multiqc/merged_replicate_peak_annotation_header.txt b/assets/multiqc/merged_replicate_peak_annotation_header.txt index d9d0eb03..e9e7a155 100644 --- a/assets/multiqc/merged_replicate_peak_annotation_header.txt +++ b/assets/multiqc/merged_replicate_peak_annotation_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_peak_annotation' -#section_name: 'MERGED REP: HOMER peak annotation' +#section_name: 'MERGED REP: MACS2: HOMER peak annotation' #description: "is generated by calculating the proportion of peaks assigned to genomic features by # HOMER annotatePeaks.pl." #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_replicate_peak_count_header.txt b/assets/multiqc/merged_replicate_peak_count_header.txt index e4118505..e1c7bafc 100644 --- a/assets/multiqc/merged_replicate_peak_count_header.txt +++ b/assets/multiqc/merged_replicate_peak_count_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_peak_count' -#section_name: 'MERGED REP: MACS2 Peak count' +#section_name: 'MERGED REP: MACS2: Peak count' #description: "is calculated from total number of peaks called by # MACS2" #plot_type: 'bargraph' diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f902256a..a2d9c29a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -91,8 +91,14 @@ module_order: - "./macs2/merged_replicate/featurecounts/*.summary" report_section_order: - mlib_peak_count: + mlib_sep_genrich_peak_count: before: mlib_deeptools + mlib_sep_genrich_frip_score: + before: mlib_sep_genrich_peak_count + mlib_sep_genrich_peak_annotation: + before: mlib_sep_genrich_frip_score + mlib_peak_count: + before: mlib_sep_genrich_peak_annotation mlib_frip_score: before: mlib_peak_count mlib_peak_annotation: @@ -115,6 +121,12 @@ report_section_order: before: mrep_featurecounts mrep_deseq2_clustering_1: before: mrep_deseq2_pca_1 + mlib_joint_genrich_peak_count: + before: mrep_deseq2_clustering_1 + mlib_joint_genrich_frip_score: + before: mlib_joint_genrich_peak_count + mlib_joint_genrich_peak_annotation: + before: mlib_joint_genrich_frip_score "nf-core-atacseq-methods-description": order: -1000 software_versions: diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index fc2096eb..b0e9a15a 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -90,6 +90,7 @@ for (idx in 1:length(HomerFiles)) { dist.freq[which(unique.gene.dat$Distance.to.TSS < 10000)] <- "< 10kb" dist.freq[which(unique.gene.dat$Distance.to.TSS < 5000)] <- "< 5kb" dist.freq[which(unique.gene.dat$Distance.to.TSS < 2000)] <- "< 2kb" + dist.freq[which(unique.gene.dat$Distance.to.TSS < 1000)] <- "< 1kb" dist.freq <- as.data.frame(table(dist.freq)) colnames(dist.freq) <- c("distance",sampleid) dist.melt <- melt(dist.freq) diff --git a/bin/plot_macs2_qc.r b/bin/plot_peaks_qc.r similarity index 98% rename from bin/plot_macs2_qc.r rename to bin/plot_peaks_qc.r index 5cf074de..0275a1f3 100755 --- a/bin/plot_macs2_qc.r +++ b/bin/plot_peaks_qc.r @@ -20,7 +20,7 @@ library(scales) option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string")) + make_option(c("-p", "--outprefix"), type="character", default='_peakqc', help="Output prefix", metavar="string")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) diff --git a/conf/base.config b/conf/base.config index f2ac8d7a..a6ad1a91 100755 --- a/conf/base.config +++ b/conf/base.config @@ -47,6 +47,11 @@ process { withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } } + withLabel:process_ultra { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 150.GB * task.attempt, 'memory' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } + } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } diff --git a/conf/modules.config b/conf/modules.config index 2da820b5..b38e0535 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -261,7 +261,8 @@ if (params.aligner == 'bowtie2') { ext.args = { [ meta.read_group ? "--rg-id ${meta.id} --rg SM:${meta.id - ~/_T\d+$/} --rg PL:ILLUMINA --rg LB:${meta.id} --rg PU:1" : '', - params.seq_center ? "--rg CN:${params.seq_center}" : '' + params.seq_center ? "--rg CN:${params.seq_center}" : '', + params.analyze_multimappers != 0 ? "-k ${params.analyze_multimappers}" : '' ].join(' ').trim() } ext.prefix = { "${meta.id}.Lb" } @@ -317,7 +318,8 @@ if (params.aligner == 'star') { '--readFilesCommand zcat', '--runRNGseed 0', '--outSAMattributes NH HI AS NM MD', - params.save_unaligned ? '--outReadsUnmapped Fastx' : '' + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.analyze_multimappers != 0 ? "--outFilterMultimapNmax ${params.analyze_multimappers} --winAnchorMultimapNmax ${params.analyze_multimappers * 2}" : '' ].join(' ').trim() publishDir = [ [ @@ -395,7 +397,7 @@ process { [ meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001', params.keep_dups ? '' : '-F 0x0400', - params.keep_multi_map ? '' : '-q 1' + params.keep_multi_map ? '' : '-q 1 -F 4 -F 256' ].join(' ').trim() } ext.prefix = { meta.single_end ? "${meta.id}.mLb.clN.sorted" : "${meta.id}.mLb.flT.sorted" } @@ -590,7 +592,7 @@ if (!params.skip_plot_fingerprint) { } process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' { ext.args = [ '--keep-dup all', '--nomodel', @@ -607,7 +609,7 @@ process { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:FRIP_SCORE' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -615,7 +617,7 @@ process { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mLb.clN_peaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -627,7 +629,7 @@ process { if (!params.skip_peak_annotation) { process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.mLb.clN_peaks" } publishDir = [ @@ -640,7 +642,7 @@ if (!params.skip_peak_annotation) { if (!params.skip_peak_qc) { process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' { ext.args = '-o ./ -p macs2_peak.mLb.clN' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -649,7 +651,7 @@ if (!params.skip_peak_annotation) { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' ext.prefix = 'macs2_annotatePeaks.mLb.clN' publishDir = [ @@ -719,6 +721,164 @@ if (!params.skip_peak_annotation) { } } +process { + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_SE' { + ext.args = '-n' + ext.prefix = { "${meta.id}.mLb.namesorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.{bam,bai}" + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_PE' { + ext.args = '-n' + ext.prefix = { "${meta.id}.mLb.namesorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { + ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } + ext.prefix = { "${meta.id}.mLb" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ], + [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig/scale" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ] + } + + withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}.mLb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ] + } +} + +if (!params.skip_genrich_sep) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_speak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_sPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } +} + if (!params.skip_ataqv) { process { withName: 'MERGED_LIBRARY_ATAQV_ATAQV' { @@ -816,7 +976,7 @@ if (!params.skip_merge_replicates) { } process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' { ext.args = [ '--keep-dup all', '--nomodel', @@ -831,7 +991,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:FRIP_SCORE' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -839,7 +999,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mRp.clN_peaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -851,7 +1011,7 @@ if (!params.skip_merge_replicates) { if (!params.skip_peak_annotation ) { process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.mRp.clN_peaks" } publishDir = [ @@ -864,7 +1024,7 @@ if (!params.skip_merge_replicates) { if (!params.skip_peak_qc) { process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' { ext.args = '-o ./ -p macs2_peak.mRp.clN' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -873,7 +1033,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' ext.prefix = 'macs2_annotatePeaks.mRp.clN' publishDir = [ @@ -943,6 +1103,76 @@ if (!params.skip_merge_replicates) { } } +process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_jpeak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_jPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + if (!params.skip_igv) { process { withName: 'IGV' { diff --git a/modules.json b/modules.json index 962c8dc3..c2358971 100644 --- a/modules.json +++ b/modules.json @@ -80,6 +80,11 @@ "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, + "genrich": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf index 74a7936f..b8740ed4 100644 --- a/modules/local/genome_blacklist_regions.nf +++ b/modules/local/genome_blacklist_regions.nf @@ -13,19 +13,22 @@ process GENOME_BLACKLIST_REGIONS { val keep_mito output: - path '*.bed' , emit: bed - path "versions.yml", emit: versions + path '*.whitelist_regions.bed', emit: whitelist_bed + path '*.blacklist_regions.bed', emit: blacklist_bed + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def file_out = "${sizes.simpleName}.include_regions.bed" + def whitelist = "${sizes.simpleName}.whitelist_regions.bed" + def blacklist_out = "${sizes.simpleName}.blacklist_regions.bed" def name_filter = mito_name ? "| awk '\$1 !~ /${params.mito_name}/ {print \$0}'": '' def mito_filter = keep_mito ? '' : name_filter if (blacklist) { """ - sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $file_out + sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $whitelist + complementBed -i $whitelist -g $sizes > $blacklist_out cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,7 +37,8 @@ process GENOME_BLACKLIST_REGIONS { """ } else { """ - awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $file_out + awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $whitelist + complementBed -i $whitelist -g $sizes > $blacklist_out cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/igv.nf b/modules/local/igv.nf index d060d0e7..be2cf8f4 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -14,6 +14,11 @@ process IGV { path ("${bigwig_replicate_publish_dir}/*") path ("${peak_replicate_publish_dir}/*") path ("${consensus_replicate_publish_dir}/*") + + path ("${bigwig_library_nf_publish_dir}/*") + path ("${peak_library_s_genrich_publish_dir}/*") + path ("${peak_library_j_genrich_publish_dir}/*") + val bigwig_library_publish_dir val peak_library_publish_dir val consensus_library_publish_dir @@ -21,6 +26,10 @@ process IGV { val peak_replicate_publish_dir val consensus_replicate_publish_dir + val bigwig_library_nf_publish_dir + val peak_library_s_genrich_publish_dir + val peak_library_j_genrich_publish_dir + output: // Publish fasta file while copyTo fails when the source and destination buckets are in different regions path "*files.txt" , emit: txt @@ -40,6 +49,9 @@ process IGV { find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_replicate_publish_dir" || test \$? = 1; } > mRp_bigwig.igv.txt find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_replicate_publish_dir" || test \$? = 1; } > mRp_peaks.igv.txt find * -type l -name "*.bed" -exec echo -e ""{}"\\t0,0,0" \\; | { grep "^$consensus_replicate_publish_dir" || test \$? = 1; } > mRp_bed.igv.txt + find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_library_nf_publish_dir" || test \$? = 1; } > mLb_nf_bigwig.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_s_genrich_publish_dir" || test \$? = 1; } > mLb_s_genrich_peaks.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_j_genrich_publish_dir" || test \$? = 1; } > mLb_j_genrich_peaks.igv.txt cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../' diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 6064fd7d..676332dd 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -40,6 +40,10 @@ process MULTIQC { path ('macs2/merged_library/annotation/*') path ('macs2/merged_library/featurecounts/*') + path ('genrich/merged_library/sep/peaks/*') + path ('genrich/merged_library/sep/peaks/*') + path ('genrich/merged_library/sep/annotation/*') + path ('alignment/merged_replicate/*') path ('alignment/merged_replicate/*') path ('alignment/merged_replicate/*') @@ -55,6 +59,10 @@ process MULTIQC { path ('deseq2_replicate/*') path ('deseq2_replicate/*') + path ('genrich/merged_library/joint/peaks/*') + path ('genrich/merged_library/joint/peaks/*') + path ('genrich/merged_library/joint/annotation/*') + output: path "*multiqc_report.html", emit: report path "*_data" , emit: data diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_peaks_qc.nf similarity index 96% rename from modules/local/plot_macs2_qc.nf rename to modules/local/plot_peaks_qc.nf index a2c39b02..d0e141d2 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_peaks_qc.nf @@ -1,4 +1,4 @@ -process PLOT_MACS2_QC { +process PLOT_PEAKS_QC { label 'process_medium' conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" @@ -22,7 +22,7 @@ process PLOT_MACS2_QC { def args = task.ext.args ?: '' def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' """ - plot_macs2_qc.r \\ + plot_peaks_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ $args diff --git a/modules/nf-core/genrich/main.nf b/modules/nf-core/genrich/main.nf new file mode 100644 index 00000000..69a6bd8c --- /dev/null +++ b/modules/nf-core/genrich/main.nf @@ -0,0 +1,69 @@ +process GENRICH { + tag "$meta.id" + label 'process_high' + + conda "bioconda::genrich=0.6.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genrich:0.6.1--h5bf99c6_1' : + 'biocontainers/genrich:0.6.1--h5bf99c6_1' }" + + input: + tuple val(meta), path(treatment_bam), path(control_bam) + path blacklist_bed + val save_pvalues + val save_pileup + val save_bed + val save_duplicates + + output: + tuple val(meta), path("*.narrowPeak") , emit: peak + tuple val(meta), path("*pvalues.bedGraph"), optional:true, emit: bedgraph_pvalues + tuple val(meta), path("*pileup.bedGraph"), optional:true, emit: bedgraph_pileup + tuple val(meta), path("*intervals.bed"), optional:true, emit: bed_intervals + tuple val(meta), path("*duplicates.txt"), optional:true, emit: duplicates + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def layout = (!args.contains("-y") && meta.single_end) ? "-y" : "" + def treatment = treatment_bam ? "-t ${treatment_bam.sort().join(',')}" : "" + def control = control_bam ? "-c ${control_bam.sort().join(',')}" : "" + def blacklist = blacklist_bed ? "-E $blacklist_bed" : "" + def pvalues = save_pvalues ? "-f ${prefix}.pvalues.bedGraph" : "" + def pileup = save_pileup ? "-k ${prefix}.pileup.bedGraph" : "" + def bed = save_bed ? "-b ${prefix}.intervals.bed" : "" + def duplicates = "" + + if (save_duplicates) { + if (args.contains('-r')) { + duplicates = "-R ${prefix}.duplicates.txt" + } else { + log.info '[Genrich] Duplicates can only be saved if they are filtered, defaulting to -r option (Remove PCR duplicates).' + duplicates = "-r -R ${prefix}.duplicates.txt" + } + } + + """ + Genrich \\ + $treatment \\ + $args \\ + $layout \\ + $blacklist \\ + -o ${prefix}.narrowPeak \\ + $pvalues \\ + $pileup \\ + $bed \\ + $duplicates \\ + $control + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genrich: \$(echo \$(Genrich --version 2>&1) | sed 's/^Genrich, version //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/genrich/meta.yml b/modules/nf-core/genrich/meta.yml new file mode 100644 index 00000000..414821f6 --- /dev/null +++ b/modules/nf-core/genrich/meta.yml @@ -0,0 +1,80 @@ +name: genrich +description: Peak-calling for ChIP-seq and ATAC-seq enrichment experiments +keywords: + - peak-calling + - ChIP-seq + - ATAC-seq +tools: + - genrich: + description: | + Genrich is a peak-caller for genomic enrichment assays (e.g. ChIP-seq, ATAC-seq). + It analyzes alignment files generated following the assay and produces a file + detailing peaks of significant enrichment. + homepage: https://github.com/jsh58/Genrich + documentation: https://github.com/jsh58/Genrich#readme + tool_dev_url: https://github.com/jsh58/Genrich + + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - treatment_bam: + type: file + description: Coordinate sorted BAM/SAM file from treatment sample or list of BAM/SAM files from biological replicates + pattern: "*.{bam,sam}" + - control_bam: + type: file + description: Coordinate sorted BAM/SAM file from control sample or list of BAM/SAM files from control samples + pattern: "*.{bam,sam}" + - blacklist_bed: + type: file + description: Bed file containing genomic intervals to exclude from the analysis + pattern: "*.{bed}" + - save_pvalues: + type: boolean + description: Create bedgraph-ish file for p/q-values file + - save_pileup: + type: boolean + description: Create bedgraph-ish file for pileups and p-values + - save_bed: + type: boolean + description: Create BED file for reads/fragments/intervals + - save_duplicates: + type: boolean + description: Create PCR duplicates file (only works if -r option is set) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: Output file is in ENCODE narrowPeak format + pattern: "*.{narrowPeak}" + - bedgraph_pvalues: + type: file + description: bedGraph file containing p/q values + pattern: "*.{pvalues.bedGraph}" + - bedgraph_pileup: + type: file + description: bedGraph file containing pileups and p-values + pattern: "*.{pileup.bedGraph}" + - bed_intervals: + type: file + description: Bed file containing annotated intervals + pattern: "*.{intervals.bed}" + - duplicates: + type: file + description: Text output file containing intervals corresponding to PCR duplicates + pattern: "*.{intervals.txt}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@JoseEspinosa" + - "@samuelruizperez" diff --git a/nextflow.config b/nextflow.config index 9f60b156..62a5c420 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,18 +43,31 @@ params { skip_merge_replicates = false save_align_intermeds = false save_unaligned = false + analyze_multimappers = 0 + // Options: Peaks narrow_peak = false + skip_peak_qc = false + skip_peak_annotation = false + + // Options: MACS2 broad_cutoff = 0.1 macs_fdr = null macs_pvalue = null min_reps_consensus = 1 save_macs_pileup = false - skip_peak_qc = false - skip_peak_annotation = false skip_consensus_peaks = false + // Options: Genrich + skip_genrich_sep = true + genrich_qvalue = 0.01 + genrich_pvalue = 0.01 + save_genrich_pileup = true + save_genrich_pvalues = true + save_genrich_bed = true + save_genrich_duplicates = true + // Options: DESeq2 QC deseq2_vst = true skip_deseq2_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 2e384c9e..9402a7d4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -325,6 +325,13 @@ "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", "fa_icon": "fas fa-cog" + }, + "analyze_multimappers": { + "type": "integer", + "default": 0, + "description": "Output this number of multimapping reads from the alignment step and include them in peak calling.", + "help_text": "This will include non-unique reads in the alignment (Bowtie2) and peak calling steps (Genrich). Default of 0 means only the best alignment is used and multimappers are ignored.", + "fa_icon": "fas fa-chart-bar" } } }, @@ -382,6 +389,43 @@ "type": "boolean", "description": "Skip consensus peak generation, annotation and counting.", "fa_icon": "fas fa-fast-forward" + }, + "skip_genrich_sep": { + "type": "boolean", + "description": "Skip Genrich individual (separate replicates) peak calling. If skipped, each group of biological replicates will be analysed together, which is the recommended approach.", + "fa_icon": "fas fa-fast-forward" + }, + "save_genrich_pileup": { + "type": "boolean", + "description": "Instruct Genrich to create bedGraph files normalised to signal per million reads.", + "fa_icon": "fas fa-save" + }, + "save_genrich_pvalues": { + "type": "boolean", + "description": "Instruct Genrich to create bedGraph files with p-values.", + "fa_icon": "fas fa-save" + }, + "save_genrich_bed": { + "type": "boolean", + "description": "Instruct Genrich to create BED files.", + "fa_icon": "fas fa-save" + }, + "save_genrich_duplicates": { + "type": "boolean", + "description": "Instruct Genrich to save duplicates.", + "fa_icon": "fas fa-save" + }, + "genrich_qvalue": { + "type": "number", + "default": 0.1, + "description": "Minimum q-value cutoff for peak detection.", + "fa_icon": "fas fa-sort-amount-down" + }, + "genrich_pvalue": { + "type": "number", + "default": 0.1, + "description": "Minimum p-value cutoff for peak detection.", + "fa_icon": "fas fa-sort-amount-down" } } }, diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf new file mode 100644 index 00000000..31bf8521 --- /dev/null +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -0,0 +1,75 @@ +include { SAMTOOLS_SORT as SAMTOOLS_SORT_SE } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_PE } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' + +workflow BAM_NOFILTER_BAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + ch_bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_bam_b } + + // + // Index SE BAM file + // + SAMTOOLS_INDEX { + ch_bam_b.single_end + } + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // + // Name sort SE BAM + // + SAMTOOLS_SORT_SE ( + ch_bam_b.single_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_SE.out.versions.first()) + + // + // Run samtools stats, flagstat and idxstats on SE BAM + // + BAM_STATS_SAMTOOLS ( + ch_bam_b.single_end.join(SAMTOOLS_INDEX.out.bai), + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) + + // + // Name sort PE BAM + // + SAMTOOLS_SORT_PE ( + ch_bam_b.paired_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_PE.out.versions.first()) + + // + // Sort, index PE BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + SAMTOOLS_SORT_PE.out.bam, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) + + emit: + name_bam = SAMTOOLS_SORT_PE.out.bam.mix(SAMTOOLS_SORT_SE.out.bam) // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats.mix(BAM_STATS_SAMTOOLS.out.idxstats) // channel: [ val(meta), [ idxstats ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf new file mode 100644 index 00000000..e162f628 --- /dev/null +++ b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf @@ -0,0 +1,183 @@ +// +// Call peaks with Genrich, annotate with HOMER and perform downstream QC +// + +include { GENRICH } from '../../modules/nf-core/genrich/main' +include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' + +include { FRIP_SCORE } from '../../modules/local/frip_score' +include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' + +include { PLOT_PEAKS_QC as PLOT_GENRICH_QC } from '../../modules/local/plot_peaks_qc' + +include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' + +workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { + take: + ch_bam // channel: [ val(meta), [ treatment_bam ], [ control_bam ] ] + ch_fasta // channel: [ fasta ] + ch_gtf // channel: [ gtf ] + genome // string: genome name + ch_blacklist_regions // channel: [ bed ] + annotate_peaks_suffix // string: suffix for input HOMER annotate peaks files to be trimmed off + ch_peak_count_header_multiqc // channel: [ header_file ] + ch_frip_score_multiqc // channel: [ header_file ] + ch_peak_annotation_header_multiqc // channel: [ header_file ] +// add broad_peak options + is_narrow_peak // boolean: true/false + skip_peak_annotation // boolean: true/false + skip_peak_qc // boolean: true/false + + save_pvalues // boolean: true/false + save_pileup // boolean: true/false + save_bed // boolean: true/false + save_duplicates // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // + // Call peaks with Genrich + // + GENRICH ( + ch_bam, + ch_blacklist_regions, + save_pvalues, + save_pileup, + save_bed, + save_duplicates + + ) + ch_versions = ch_versions.mix(GENRICH.out.versions.first()) + + // + // Filter out samples with 0 Genrich peaks called + // + GENRICH + .out + .peak + .filter { + meta, peaks -> + peaks.size() > 0 + } + .set { ch_genrich_peaks } + + + // Create channels: [ meta, ip_bam, peaks ] + + ch_bam + .join(ch_genrich_peaks, by: [0]) + .map { + meta, treatment_bam, control_bam, peaks -> + [ meta, treatment_bam, peaks ] + } + .set { ch_bam_peaks } + + // split ch_bam_peaks by treatment_bam + + ch_bam_peaks + .transpose() + .map { + meta, treatment_bam, peaks -> + def meta_clone = meta.clone() + meta_clone.id = treatment_bam.getSimpleName() + [ meta_clone, treatment_bam, peaks ] + } + .set { ch_bam_peaks_treatment_bam } + + // + // Calculate FRiP score + // + FRIP_SCORE ( + ch_bam_peaks_treatment_bam + ) + ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) + + // Create channels: [ meta, peaks, frip ] + ch_bam_peaks_treatment_bam + .join(FRIP_SCORE.out.txt, by: [0]) + .map { + meta, ip_bam, peaks, frip -> + [ meta, peaks, frip ] + } + .set { ch_bam_peak_frip } + + // + // FRiP score custom content for MultiQC + // + MULTIQC_CUSTOM_PEAKS ( + ch_bam_peak_frip, + ch_peak_count_header_multiqc, + ch_frip_score_multiqc + ) + ch_versions = ch_versions.mix(MULTIQC_CUSTOM_PEAKS.out.versions.first()) + + ch_homer_annotatepeaks = Channel.empty() + ch_homer_det_annotatepeaks = Channel.empty() + ch_plot_genrich_qc_txt = Channel.empty() + ch_plot_genrich_qc_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_txt = Channel.empty() + ch_plot_homer_annotatepeaks_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_tsv = Channel.empty() + if (!skip_peak_annotation) { + // + // Annotate peaks with HOMER + // + HOMER_ANNOTATEPEAKS ( + ch_genrich_peaks, + ch_fasta, + ch_gtf + ) + ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) + + if (!skip_peak_qc) { + // + // Genrich QC plots with R + // + PLOT_GENRICH_QC ( + ch_genrich_peaks.collect{it[1]}, + is_narrow_peak + ) + ch_plot_genrich_qc_txt = PLOT_GENRICH_QC.out.txt + ch_plot_genrich_qc_pdf = PLOT_GENRICH_QC.out.pdf + ch_versions = ch_versions.mix(PLOT_GENRICH_QC.out.versions) + + // + // Peak annotation QC plots with R + // + PLOT_HOMER_ANNOTATEPEAKS ( + HOMER_ANNOTATEPEAKS.out.txt.collect{it[1]}, + ch_peak_annotation_header_multiqc, + annotate_peaks_suffix + ) + ch_plot_homer_annotatepeaks_txt = PLOT_HOMER_ANNOTATEPEAKS.out.txt + ch_plot_homer_annotatepeaks_pdf = PLOT_HOMER_ANNOTATEPEAKS.out.pdf + ch_plot_homer_annotatepeaks_tsv = PLOT_HOMER_ANNOTATEPEAKS.out.tsv + ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) + } + } + + emit: + peaks = ch_genrich_peaks // channel: [ val(meta), [ peaks ] ] + bed_intervals = GENRICH.out.bed_intervals // channel: [ val(meta), [ bed ] ] + bedgraph_pileup = GENRICH.out.bedgraph_pileup // channel: [ val(meta), [ bedgraph ] ] + bedgraph_pvalues = GENRICH.out.bedgraph_pvalues // channel: [ val(meta), [ bedgraph ] ] + duplicates = GENRICH.out.duplicates // channel: [ val(meta), [ bedgraph ] ] + + frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] + + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] + peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] + + homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ] + plot_genrich_qc_txt = ch_plot_genrich_qc_txt // channel: [ txt ] + plot_genrich_qc_pdf = ch_plot_genrich_qc_pdf // channel: [ pdf ] + + plot_homer_annotatepeaks_txt = ch_plot_homer_annotatepeaks_txt // channel: [ txt ] + plot_homer_annotatepeaks_pdf = ch_plot_homer_annotatepeaks_pdf // channel: [ pdf ] + plot_homer_annotatepeaks_tsv = ch_plot_homer_annotatepeaks_tsv // channel: [ tsv ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index 4c2a8710..42d3195e 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -7,7 +7,7 @@ include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatep include { FRIP_SCORE } from '../../modules/local/frip_score' include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' -include { PLOT_MACS2_QC } from '../../modules/local/plot_macs2_qc' +include { PLOT_PEAKS_QC as PLOT_MACS2_QC } from '../../modules/local/plot_peaks_qc' include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { @@ -23,7 +23,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { is_narrow_peak // boolean: true/false skip_peak_annotation // boolean: true/false skip_peak_qc // boolean: true/false - + main: ch_versions = Channel.empty() @@ -43,7 +43,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { MACS2_CALLPEAK .out .peak - .filter { + .filter { meta, peaks -> peaks.size() > 0 } @@ -102,7 +102,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ) ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) - + if (!skip_peak_qc) { // // MACS2 QC plots with R @@ -138,7 +138,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { bedgraph = MACS2_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ] frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] - + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index d6dbffcf..cd54d112 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -143,13 +143,15 @@ workflow PREPARE_GENOME { // Prepare genome intervals for filtering by removing regions in blacklist file // ch_genome_filtered_bed = Channel.empty() + ch_genome_blacklist_bed = Channel.empty() GENOME_BLACKLIST_REGIONS ( ch_chrom_sizes, ch_blacklist.ifEmpty([]), params.mito_name ?: '', params.keep_mito ) - ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed + ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.whitelist_bed + ch_genome_blacklist_bed = GENOME_BLACKLIST_REGIONS.out.blacklist_bed ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) // @@ -244,7 +246,8 @@ workflow PREPARE_GENOME { gene_bed = ch_gene_bed // path: gene.bed tss_bed = ch_tss_bed // path: tss.bed chrom_sizes = ch_chrom_sizes // path: genome.sizes - filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed + filtered_bed = ch_genome_filtered_bed // path: *.whitelist_regions.bed + blacklist_bed = ch_genome_blacklist_bed // path: *.blacklist_regions.bed bwa_index = ch_bwa_index // path: bwa/index/ bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ chromap_index = ch_chromap_index // path: genome.index diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index 5daf5839..9796a5e0 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -43,8 +43,19 @@ ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config) // Header files for MultiQC ch_multiqc_merged_library_peak_count_header = file("$projectDir/assets/multiqc/merged_library_peak_count_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt", checkIfExists: true) + + ch_multiqc_merged_library_frip_score_header = file("$projectDir/assets/multiqc/merged_library_frip_score_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt", checkIfExists: true) + ch_multiqc_merged_library_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_peak_annotation_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt", checkIfExists: true) + + ch_multiqc_merged_library_deseq2_pca_header = file("$projectDir/assets/multiqc/merged_library_deseq2_pca_header.txt", checkIfExists: true) ch_multiqc_merged_library_deseq2_clustering_header = file("$projectDir/assets/multiqc/merged_library_deseq2_clustering_header.txt", checkIfExists: true) @@ -71,14 +82,21 @@ include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { ALIGN_STAR } from '../subworkflows/local/align_star' include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools' include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools' +include { BAM_NOFILTER_BAMTOOLS as MERGED_LIBRARY_NOFILTER_BAM } from '../subworkflows/local/bam_nofilter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' +include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_NF_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_REPLICATE_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_LIBRARY_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_REPLICATE_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf' + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -157,10 +175,20 @@ workflow ATACSEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + // + // Check if analyze_multimappers is set + // + if (params.analyze_multimappers != 0) { + if (params.aligner == 'bwa' || params.aligner == 'chromap') { + exit 1, 'Multimapping read analysis is so far only supported for Bowtie2 and STAR. Remove the --analyze_multimappers parameter or change --aligner to bowtie2 or star.' + } + } + // // SUBWORKFLOW: Alignment with BWA & BAM QC // ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() ch_samtools_stats = Channel.empty() ch_samtools_flagstat = Channel.empty() ch_samtools_idxstats = Channel.empty() @@ -175,6 +203,7 @@ workflow ATACSEQ { } ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai ch_samtools_stats = FASTQ_ALIGN_BWA.out.stats ch_samtools_flagstat = FASTQ_ALIGN_BWA.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats @@ -196,6 +225,7 @@ workflow ATACSEQ { } ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai ch_samtools_stats = FASTQ_ALIGN_BOWTIE2.out.stats ch_samtools_flagstat = FASTQ_ALIGN_BOWTIE2.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_BOWTIE2.out.idxstats @@ -219,6 +249,7 @@ workflow ATACSEQ { [] ) ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam + ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats @@ -240,6 +271,7 @@ workflow ATACSEQ { params.seq_center ?: '' ) ch_genome_bam = ALIGN_STAR.out.bam + ch_genome_bam_index = ALIGN_STAR.out.bai ch_samtools_stats = ALIGN_STAR.out.stats ch_samtools_flagstat = ALIGN_STAR.out.flagstat ch_samtools_idxstats = ALIGN_STAR.out.idxstats @@ -293,17 +325,7 @@ workflow ATACSEQ { // SUBWORKFLOW: Filter BAM file // MERGED_LIBRARY_FILTER_BAM ( - MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - }, + MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam.join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]), PREPARE_GENOME.out.filtered_bed.first(), PREPARE_GENOME .out @@ -384,16 +406,7 @@ workflow ATACSEQ { MERGED_LIBRARY_FILTER_BAM .out .bam - .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_FILTER_BAM.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } + .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0]) .set { ch_bam_bai } if (params.with_control) { @@ -446,7 +459,7 @@ workflow ATACSEQ { // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC // - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS ( + MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 ( ch_bam_library, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -459,7 +472,12 @@ workflow ATACSEQ { params.skip_peak_annotation, params.skip_peak_qc ) - ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.versions) + ch_library_peaks = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peaks + ch_library_frip_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc + ch_library_peak_count_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc + ch_library_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv + + ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.versions) // // SUBWORKFLOW: Consensus peaks analysis @@ -470,7 +488,7 @@ workflow ATACSEQ { ch_deseq2_clustering_library_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { MERGED_LIBRARY_CONSENSUS_PEAKS ( - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks, + ch_library_peaks, ch_bam_library, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -487,21 +505,107 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_CONSENSUS_PEAKS.out.versions) } + // + // SUBWORKFLOW: Sort by name and generate stats for unfiltered BAMs + // + MERGED_LIBRARY_NOFILTER_BAM ( + PICARD_MERGESAMFILES_LIBRARY.out.bam, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + } + ) + ch_bam = MERGED_LIBRARY_NOFILTER_BAM.out.bam + ch_name_bam = MERGED_LIBRARY_NOFILTER_BAM.out.name_bam + ch_versions = ch_versions.mix(MERGED_LIBRARY_NOFILTER_BAM.out.versions.first()) + + // + // SUBWORKFLOW: Normalised bigWig coverage tracks + // + MERGED_LIBRARY_NF_BAM_TO_BIGWIG ( + ch_bam.join(MERGED_LIBRARY_NOFILTER_BAM.out.flagstat, by: [0]), + PREPARE_GENOME.out.chrom_sizes + ) + ch_versions = ch_versions.mix(MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.versions) + + + // Create channels: [ meta, [bam] ] or [ meta, [ bam, control_bam ] ] (now for Genrich) + if (params.with_control) { + ch_name_bam + .map { + meta, bam -> + meta.control ? null : [ meta.id, [ bam ] ] + } + .set { ch_control_bam } + + ch_name_bam + .map { + meta, bam -> + meta.control ? [ meta.control, meta, [ bam ] ] : null + } + .combine(ch_control_bam, by: 0) + .map { it -> [ it[1] , it[2] + it[4] ] } + .set { ch_name_bam } + } + + // Create channel: [ val(meta), bam, control_bam ] (now for Genrich) + if (params.with_control) { + ch_name_bam + .map { + meta, bams -> + [ meta , bams[0], bams[1] ] + } + .set { ch_merged_library_bams_sep } + } else { + ch_name_bam + .map { + meta, bam -> + [ meta , bam, [] ] + } + .set { ch_merged_library_bams_sep } + } + + // + // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC + // + ch_library_genrich_sep_peaks = Channel.empty() + ch_library_genrich_sep_frip_multiqc = Channel.empty() + ch_library_genrich_sep_peak_count_multiqc = Channel.empty() + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = Channel.empty() + if (!params.skip_genrich_sep) { + MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH ( + ch_merged_library_bams_sep, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + params.genome, + PREPARE_GENOME.out.blacklist_bed.first(), + ".mLb.genrich.speaks.annotatePeaks.txt", + ch_multiqc_merged_library_sep_genrich_peak_count_header, + ch_multiqc_merged_library_sep_genrich_frip_score_header, + ch_multiqc_merged_library_sep_genrich_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc, + params.save_genrich_pvalues, + params.save_genrich_pileup, + params.save_genrich_bed, + params.save_genrich_duplicates + ) + ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks + ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc + ch_library_genrich_sep_peak_count_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv + ch_versions = ch_versions.mix(MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + } + // Create channels: [ meta, bam, bai, peak_file ] MERGED_LIBRARY_MARKDUPLICATES_PICARD .out .bam - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } - .join(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks, by: [0]) + .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]) + .join(ch_library_peaks, by: [0]) .set { ch_bam_peaks } // @@ -635,7 +739,8 @@ workflow ATACSEQ { // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC // - MERGED_REPLICATE_CALL_ANNOTATE_PEAKS ( + + MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 ( ch_bam_replicate, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -648,18 +753,18 @@ workflow ATACSEQ { params.skip_peak_annotation, params.skip_peak_qc ) - ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks - ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.frip_multiqc - ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc - ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv - ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.versions) + ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks + ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc + ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc + ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv + ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.versions) // // SUBWORKFLOW: Consensus peaks analysis // if (!params.skip_consensus_peaks) { MERGED_REPLICATE_CONSENSUS_PEAKS ( - MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks, + MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks, ch_merged_library_replicate_bam, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -677,6 +782,50 @@ workflow ATACSEQ { } } + // Check if we have multiple replicates (now for Genrich) + ch_merged_library_bams_sep + .map { + meta, bam, control_bam -> + def meta_clone = meta.clone() + meta_clone.id = meta_clone.id - ~/_REP\d+$/ + meta_clone.control = meta_clone.control ? meta_clone.control - ~/_REP\d+$/ : "" + [ meta_clone.id, meta_clone, bam, control_bam ] + } + .groupTuple() + .map { + id, metas, bams, control_bams -> + [ metas[0], bams.flatten(), control_bams.flatten() ] + } + .set { ch_merged_library_bams_joint } + + // + // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC + // + MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH ( + ch_merged_library_bams_joint, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + params.genome, + PREPARE_GENOME.out.blacklist_bed.first(), + ".mLb.genrich.jpeaks.annotatePeaks.txt", + ch_multiqc_merged_library_joint_genrich_peak_count_header, + ch_multiqc_merged_library_joint_genrich_frip_score_header, + ch_multiqc_merged_library_joint_genrich_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc, + params.save_genrich_pvalues, + params.save_genrich_pileup, + params.save_genrich_bed, + params.save_genrich_duplicates + ) + ch_library_genrich_joint_peaks = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks + ch_library_genrich_joint_frip_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc + ch_library_genrich_joint_peak_count_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc + ch_library_genrich_joint_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv + + ch_versions = ch_versions.mix(MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + // // MODULE: Create IGV session // @@ -685,11 +834,16 @@ workflow ATACSEQ { PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.fai, MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]), + ch_library_peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]), ch_ucsc_bedgraphtobigwig_replicate_bigwig.collect{it[1]}.ifEmpty([]), ch_macs2_replicate_peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_replicate_bed.collect{it[1]}.ifEmpty([]), + + MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_peaks.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_peaks.collect{it[1]}.ifEmpty([]), + "${params.aligner}/merged_library/bigwig", { ["${params.aligner}/merged_library/macs2", params.narrow_peak? '/narrow_peak' : '/broad_peak' @@ -706,6 +860,15 @@ workflow ATACSEQ { params.narrow_peak? '/narrow_peak' : '/broad_peak', "/consensus" ].join('') }, + + "${params.aligner}/merged_library/genrich/bigwig", + { ["${params.aligner}/merged_library/genrich/sep", + params.narrow_peak? '/narrow_peak' : '/broad_peak' + ].join('') }, + { ["${params.aligner}/merged_library/genrich/joint", + params.narrow_peak? '/narrow_peak' : '/broad_peak' + ].join('') }, + ) ch_versions = ch_versions.mix(IGV.out.versions) } @@ -756,11 +919,15 @@ workflow ATACSEQ { ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.frip_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), + ch_library_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), ch_featurecounts_library_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), + ch_markduplicates_replicate_stats.collect{it[1]}.ifEmpty([]), ch_markduplicates_replicate_flagstat.collect{it[1]}.ifEmpty([]), ch_markduplicates_replicate_idxstats.collect{it[1]}.ifEmpty([]), @@ -774,7 +941,11 @@ workflow ATACSEQ { ch_deseq2_pca_library_multiqc.collect().ifEmpty([]), ch_deseq2_clustering_library_multiqc.collect().ifEmpty([]), ch_deseq2_pca_replicate_multiqc.collect().ifEmpty([]), - ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([]) + ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([]), + + ch_library_genrich_joint_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() }