diff --git a/assets/multiqc/merged_library_frip_score_header.txt b/assets/multiqc/merged_library_frip_score_header.txt
index f89a75f6..866af81d 100644
--- a/assets/multiqc/merged_library_frip_score_header.txt
+++ b/assets/multiqc/merged_library_frip_score_header.txt
@@ -1,5 +1,5 @@
#id: 'mlib_frip_score'
-#section_name: 'MERGED LIB: MACS2 peak FRiP score'
+#section_name: 'MERGED LIB: MACS2: Peak FRiP score'
#description: "is generated by calculating the fraction of all mapped reads that fall
# into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted.
# See FRiP score."
diff --git a/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt
new file mode 100644
index 00000000..6c5b15bc
--- /dev/null
+++ b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt
@@ -0,0 +1,14 @@
+#id: 'mlib_joint_genrich_frip_score'
+#section_name: 'MERGED LIB: Genrich (joint): Peak FRiP score'
+#description: "is generated by calculating the fraction of all mapped reads that fall
+# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted.
+# See FRiP score."
+#plot_type: 'bargraph'
+#anchor: 'mlib_frip_score'
+#pconfig:
+# title: 'FRiP score'
+# ylab: 'FRiP score'
+# ymax: 1
+# ymin: 0
+# tt_decimals: 2
+# cpswitch: False
diff --git a/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt
new file mode 100644
index 00000000..e4b361d3
--- /dev/null
+++ b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt
@@ -0,0 +1,9 @@
+#id: 'mlib_joint_genrich_peak_annotation'
+#section_name: 'MERGED LIB: Genrich (joint): HOMER peak annotation'
+#description: "is generated by calculating the proportion of peaks assigned to genomic features by
+# HOMER annotatePeaks.pl."
+#plot_type: 'bargraph'
+#anchor: 'mlib_peak_annotation'
+#pconfig:
+# title: 'Peak to feature proportion'
+# ylab: 'Peak count'
diff --git a/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt
new file mode 100644
index 00000000..1d5e014c
--- /dev/null
+++ b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt
@@ -0,0 +1,10 @@
+#id: 'mlib_joint_genrich_peak_count'
+#section_name: 'MERGED LIB: Genrich (joint): Peak count'
+#description: "is calculated from total number of peaks called by
+# Genrich"
+#plot_type: 'bargraph'
+#anchor: 'mlib_peak_count'
+#pconfig:
+# title: 'Total peak count'
+# ylab: 'Peak count'
+# cpswitch: False
diff --git a/assets/multiqc/merged_library_peak_annotation_header.txt b/assets/multiqc/merged_library_peak_annotation_header.txt
index 6f668c31..f3bc0ac9 100644
--- a/assets/multiqc/merged_library_peak_annotation_header.txt
+++ b/assets/multiqc/merged_library_peak_annotation_header.txt
@@ -1,5 +1,5 @@
#id: 'mlib_peak_annotation'
-#section_name: 'MERGED LIB: HOMER peak annotation'
+#section_name: 'MERGED LIB: MACS2: HOMER peak annotation'
#description: "is generated by calculating the proportion of peaks assigned to genomic features by
# HOMER annotatePeaks.pl."
#plot_type: 'bargraph'
diff --git a/assets/multiqc/merged_library_peak_count_header.txt b/assets/multiqc/merged_library_peak_count_header.txt
index 60f5745c..b6075a03 100644
--- a/assets/multiqc/merged_library_peak_count_header.txt
+++ b/assets/multiqc/merged_library_peak_count_header.txt
@@ -1,5 +1,5 @@
#id: 'mlib_peak_count'
-#section_name: 'MERGED LIB: MACS2 peak count'
+#section_name: 'MERGED LIB: MACS2: Peak count'
#description: "is calculated from total number of peaks called by
# MACS2"
#plot_type: 'bargraph'
diff --git a/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt
new file mode 100644
index 00000000..534c2373
--- /dev/null
+++ b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt
@@ -0,0 +1,14 @@
+#id: 'mlib_sep_genrich_frip_score'
+#section_name: 'MERGED LIB: Genrich (sep): Peak FRiP score'
+#description: "is generated by calculating the fraction of all mapped reads that fall
+# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted.
+# See FRiP score."
+#plot_type: 'bargraph'
+#anchor: 'mlib_frip_score'
+#pconfig:
+# title: 'FRiP score'
+# ylab: 'FRiP score'
+# ymax: 1
+# ymin: 0
+# tt_decimals: 2
+# cpswitch: False
diff --git a/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt
new file mode 100644
index 00000000..911e3343
--- /dev/null
+++ b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt
@@ -0,0 +1,9 @@
+#id: 'mlib_sep_genrich_peak_annotation'
+#section_name: 'MERGED LIB: Genrich (sep): HOMER peak annotation'
+#description: "is generated by calculating the proportion of peaks assigned to genomic features by
+# HOMER annotatePeaks.pl."
+#plot_type: 'bargraph'
+#anchor: 'mlib_peak_annotation'
+#pconfig:
+# title: 'Peak to feature proportion'
+# ylab: 'Peak count'
diff --git a/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt
new file mode 100644
index 00000000..4cac61aa
--- /dev/null
+++ b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt
@@ -0,0 +1,10 @@
+#id: 'mlib_sep_genrich_peak_count'
+#section_name: 'MERGED LIB: Genrich (sep): Peak count'
+#description: "is calculated from total number of peaks called by
+# Genrich"
+#plot_type: 'bargraph'
+#anchor: 'mlib_peak_count'
+#pconfig:
+# title: 'Total peak count'
+# ylab: 'Peak count'
+# cpswitch: False
diff --git a/assets/multiqc/merged_replicate_frip_score_header.txt b/assets/multiqc/merged_replicate_frip_score_header.txt
index c0f25a5b..5a17b63f 100644
--- a/assets/multiqc/merged_replicate_frip_score_header.txt
+++ b/assets/multiqc/merged_replicate_frip_score_header.txt
@@ -1,5 +1,5 @@
#id: 'mrep_frip_score'
-#section_name: 'MERGED REP: MACS2 peak FRiP score'
+#section_name: 'MERGED REP: MACS: Peak FRiP score'
#description: "is generated by calculating the fraction of all mapped reads that fall
# into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted.
# See FRiP score."
diff --git a/assets/multiqc/merged_replicate_peak_annotation_header.txt b/assets/multiqc/merged_replicate_peak_annotation_header.txt
index d9d0eb03..e9e7a155 100644
--- a/assets/multiqc/merged_replicate_peak_annotation_header.txt
+++ b/assets/multiqc/merged_replicate_peak_annotation_header.txt
@@ -1,5 +1,5 @@
#id: 'mrep_peak_annotation'
-#section_name: 'MERGED REP: HOMER peak annotation'
+#section_name: 'MERGED REP: MACS2: HOMER peak annotation'
#description: "is generated by calculating the proportion of peaks assigned to genomic features by
# HOMER annotatePeaks.pl."
#plot_type: 'bargraph'
diff --git a/assets/multiqc/merged_replicate_peak_count_header.txt b/assets/multiqc/merged_replicate_peak_count_header.txt
index e4118505..e1c7bafc 100644
--- a/assets/multiqc/merged_replicate_peak_count_header.txt
+++ b/assets/multiqc/merged_replicate_peak_count_header.txt
@@ -1,5 +1,5 @@
#id: 'mrep_peak_count'
-#section_name: 'MERGED REP: MACS2 Peak count'
+#section_name: 'MERGED REP: MACS2: Peak count'
#description: "is calculated from total number of peaks called by
# MACS2"
#plot_type: 'bargraph'
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index f902256a..a2d9c29a 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -91,8 +91,14 @@ module_order:
- "./macs2/merged_replicate/featurecounts/*.summary"
report_section_order:
- mlib_peak_count:
+ mlib_sep_genrich_peak_count:
before: mlib_deeptools
+ mlib_sep_genrich_frip_score:
+ before: mlib_sep_genrich_peak_count
+ mlib_sep_genrich_peak_annotation:
+ before: mlib_sep_genrich_frip_score
+ mlib_peak_count:
+ before: mlib_sep_genrich_peak_annotation
mlib_frip_score:
before: mlib_peak_count
mlib_peak_annotation:
@@ -115,6 +121,12 @@ report_section_order:
before: mrep_featurecounts
mrep_deseq2_clustering_1:
before: mrep_deseq2_pca_1
+ mlib_joint_genrich_peak_count:
+ before: mrep_deseq2_clustering_1
+ mlib_joint_genrich_frip_score:
+ before: mlib_joint_genrich_peak_count
+ mlib_joint_genrich_peak_annotation:
+ before: mlib_joint_genrich_frip_score
"nf-core-atacseq-methods-description":
order: -1000
software_versions:
diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r
index fc2096eb..b0e9a15a 100755
--- a/bin/plot_homer_annotatepeaks.r
+++ b/bin/plot_homer_annotatepeaks.r
@@ -90,6 +90,7 @@ for (idx in 1:length(HomerFiles)) {
dist.freq[which(unique.gene.dat$Distance.to.TSS < 10000)] <- "< 10kb"
dist.freq[which(unique.gene.dat$Distance.to.TSS < 5000)] <- "< 5kb"
dist.freq[which(unique.gene.dat$Distance.to.TSS < 2000)] <- "< 2kb"
+ dist.freq[which(unique.gene.dat$Distance.to.TSS < 1000)] <- "< 1kb"
dist.freq <- as.data.frame(table(dist.freq))
colnames(dist.freq) <- c("distance",sampleid)
dist.melt <- melt(dist.freq)
diff --git a/bin/plot_macs2_qc.r b/bin/plot_peaks_qc.r
similarity index 98%
rename from bin/plot_macs2_qc.r
rename to bin/plot_peaks_qc.r
index 5cf074de..0275a1f3 100755
--- a/bin/plot_macs2_qc.r
+++ b/bin/plot_peaks_qc.r
@@ -20,7 +20,7 @@ library(scales)
option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"),
make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"),
make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"),
- make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string"))
+ make_option(c("-p", "--outprefix"), type="character", default='_peakqc', help="Output prefix", metavar="string"))
opt_parser <- OptionParser(option_list=option_list)
opt <- parse_args(opt_parser)
diff --git a/conf/base.config b/conf/base.config
index f2ac8d7a..a6ad1a91 100755
--- a/conf/base.config
+++ b/conf/base.config
@@ -47,6 +47,11 @@ process {
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
+ withLabel:process_ultra {
+ cpus = { check_max( 8 * task.attempt, 'cpus' ) }
+ memory = { check_max( 150.GB * task.attempt, 'memory' ) }
+ time = { check_max( 120.h * task.attempt, 'time' ) }
+ }
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
}
diff --git a/conf/modules.config b/conf/modules.config
index 2da820b5..b38e0535 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -261,7 +261,8 @@ if (params.aligner == 'bowtie2') {
ext.args = {
[
meta.read_group ? "--rg-id ${meta.id} --rg SM:${meta.id - ~/_T\d+$/} --rg PL:ILLUMINA --rg LB:${meta.id} --rg PU:1" : '',
- params.seq_center ? "--rg CN:${params.seq_center}" : ''
+ params.seq_center ? "--rg CN:${params.seq_center}" : '',
+ params.analyze_multimappers != 0 ? "-k ${params.analyze_multimappers}" : ''
].join(' ').trim()
}
ext.prefix = { "${meta.id}.Lb" }
@@ -317,7 +318,8 @@ if (params.aligner == 'star') {
'--readFilesCommand zcat',
'--runRNGseed 0',
'--outSAMattributes NH HI AS NM MD',
- params.save_unaligned ? '--outReadsUnmapped Fastx' : ''
+ params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
+ params.analyze_multimappers != 0 ? "--outFilterMultimapNmax ${params.analyze_multimappers} --winAnchorMultimapNmax ${params.analyze_multimappers * 2}" : ''
].join(' ').trim()
publishDir = [
[
@@ -395,7 +397,7 @@ process {
[
meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001',
params.keep_dups ? '' : '-F 0x0400',
- params.keep_multi_map ? '' : '-q 1'
+ params.keep_multi_map ? '' : '-q 1 -F 4 -F 256'
].join(' ').trim()
}
ext.prefix = { meta.single_end ? "${meta.id}.mLb.clN.sorted" : "${meta.id}.mLb.flT.sorted" }
@@ -590,7 +592,7 @@ if (!params.skip_plot_fingerprint) {
}
process {
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' {
ext.args = [
'--keep-dup all',
'--nomodel',
@@ -607,7 +609,7 @@ process {
]
}
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:FRIP_SCORE' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' {
ext.args = '-bed -c -f 0.20'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -615,7 +617,7 @@ process {
]
}
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' {
ext.prefix = { "${meta.id}.mLb.clN_peaks" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -627,7 +629,7 @@ process {
if (!params.skip_peak_annotation) {
process {
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' {
ext.args = '-gid'
ext.prefix = { "${meta.id}.mLb.clN_peaks" }
publishDir = [
@@ -640,7 +642,7 @@ if (!params.skip_peak_annotation) {
if (!params.skip_peak_qc) {
process {
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' {
ext.args = '-o ./ -p macs2_peak.mLb.clN'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -649,7 +651,7 @@ if (!params.skip_peak_annotation) {
]
}
- withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' {
+ withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' {
ext.args = '-o ./'
ext.prefix = 'macs2_annotatePeaks.mLb.clN'
publishDir = [
@@ -719,6 +721,164 @@ if (!params.skip_peak_annotation) {
}
}
+process {
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_INDEX' {
+ ext.prefix = { "${meta.id}.mLb.sorted" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: '*.{bai,csi}'
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.mLb.sorted.bam" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: '*.{stats,flagstat,idxstats}'
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_SE' {
+ ext.args = '-n'
+ ext.prefix = { "${meta.id}.mLb.namesorted" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: '*.bam',
+ enabled: params.save_align_intermeds
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.mLb.sorted" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: "*.{bam,bai}"
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_PE' {
+ ext.args = '-n'
+ ext.prefix = { "${meta.id}.mLb.namesorted" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: '*.bam',
+ enabled: params.save_align_intermeds
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.mLb.sorted.bam" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" },
+ mode: params.publish_dir_mode,
+ pattern: "*.{stats,flagstat,idxstats}"
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' {
+ ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' }
+ ext.prefix = { "${meta.id}.mLb" }
+ publishDir = [
+ [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" },
+ mode: params.publish_dir_mode,
+ pattern: "*.bigWig"
+ ],
+ [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig/scale" },
+ mode: params.publish_dir_mode,
+ pattern: "*.txt"
+ ]
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:UCSC_BEDGRAPHTOBIGWIG' {
+ ext.prefix = { "${meta.id}.mLb" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" },
+ mode: params.publish_dir_mode,
+ pattern: "*.bigWig"
+ ]
+ }
+}
+
+if (!params.skip_genrich_sep) {
+ process {
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' {
+ ext.prefix = { "${meta.id}.mLb" }
+ ext.args = [ '-j',
+ params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '',
+ params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '',
+ params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : ''
+ ].join(' ').trim()
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' {
+ ext.args = '-bed -c -f 0.20'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ enabled: false
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' {
+ ext.prefix = { "${meta.id}.mLb.genrich.speaks" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+
+ if (!params.skip_peak_annotation) {
+ process {
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' {
+ ext.args = '-gid'
+ ext.prefix = { "${meta.id}.mLb.genrich.speaks" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+
+ if (!params.skip_peak_qc) {
+ process {
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' {
+ ext.args = '-o ./ -p genrich_speak.mLb'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' {
+ ext.args = '-o ./'
+ ext.prefix = 'genrich_annotate_sPeaks.mLb'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+ }
+ }
+}
+
if (!params.skip_ataqv) {
process {
withName: 'MERGED_LIBRARY_ATAQV_ATAQV' {
@@ -816,7 +976,7 @@ if (!params.skip_merge_replicates) {
}
process {
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' {
ext.args = [
'--keep-dup all',
'--nomodel',
@@ -831,7 +991,7 @@ if (!params.skip_merge_replicates) {
]
}
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:FRIP_SCORE' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' {
ext.args = '-bed -c -f 0.20'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -839,7 +999,7 @@ if (!params.skip_merge_replicates) {
]
}
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' {
ext.prefix = { "${meta.id}.mRp.clN_peaks" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -851,7 +1011,7 @@ if (!params.skip_merge_replicates) {
if (!params.skip_peak_annotation ) {
process {
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' {
ext.args = '-gid'
ext.prefix = { "${meta.id}.mRp.clN_peaks" }
publishDir = [
@@ -864,7 +1024,7 @@ if (!params.skip_merge_replicates) {
if (!params.skip_peak_qc) {
process {
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' {
ext.args = '-o ./ -p macs2_peak.mRp.clN'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
@@ -873,7 +1033,7 @@ if (!params.skip_merge_replicates) {
]
}
- withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' {
+ withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' {
ext.args = '-o ./'
ext.prefix = 'macs2_annotatePeaks.mRp.clN'
publishDir = [
@@ -943,6 +1103,76 @@ if (!params.skip_merge_replicates) {
}
}
+process {
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' {
+ ext.prefix = { "${meta.id}.mLb" }
+ ext.args = [ '-j',
+ params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '',
+ params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '',
+ params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : ''
+ ].join(' ').trim()
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' {
+ ext.args = '-bed -c -f 0.20'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ enabled: false
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' {
+ ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+}
+
+if (!params.skip_peak_annotation) {
+ process {
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' {
+ ext.args = '-gid'
+ ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" }
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+
+ if (!params.skip_peak_qc) {
+ process {
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' {
+ ext.args = '-o ./ -p genrich_jpeak.mLb'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' {
+ ext.args = '-o ./'
+ ext.prefix = 'genrich_annotate_jPeaks.mLb'
+ publishDir = [
+ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+ }
+}
+
if (!params.skip_igv) {
process {
withName: 'IGV' {
diff --git a/modules.json b/modules.json
index 962c8dc3..c2358971 100644
--- a/modules.json
+++ b/modules.json
@@ -80,6 +80,11 @@
"git_sha": "bd8092b67b5103bdd52e300f75889442275c3117",
"installed_by": ["fastq_fastqc_umitools_trimgalore"]
},
+ "genrich": {
+ "branch": "master",
+ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+ "installed_by": ["modules"]
+ },
"gffread": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf
index 74a7936f..b8740ed4 100644
--- a/modules/local/genome_blacklist_regions.nf
+++ b/modules/local/genome_blacklist_regions.nf
@@ -13,19 +13,22 @@ process GENOME_BLACKLIST_REGIONS {
val keep_mito
output:
- path '*.bed' , emit: bed
- path "versions.yml", emit: versions
+ path '*.whitelist_regions.bed', emit: whitelist_bed
+ path '*.blacklist_regions.bed', emit: blacklist_bed
+ path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
- def file_out = "${sizes.simpleName}.include_regions.bed"
+ def whitelist = "${sizes.simpleName}.whitelist_regions.bed"
+ def blacklist_out = "${sizes.simpleName}.blacklist_regions.bed"
def name_filter = mito_name ? "| awk '\$1 !~ /${params.mito_name}/ {print \$0}'": ''
def mito_filter = keep_mito ? '' : name_filter
if (blacklist) {
"""
- sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $file_out
+ sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $whitelist
+ complementBed -i $whitelist -g $sizes > $blacklist_out
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -34,7 +37,8 @@ process GENOME_BLACKLIST_REGIONS {
"""
} else {
"""
- awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $file_out
+ awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $whitelist
+ complementBed -i $whitelist -g $sizes > $blacklist_out
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/igv.nf b/modules/local/igv.nf
index d060d0e7..be2cf8f4 100644
--- a/modules/local/igv.nf
+++ b/modules/local/igv.nf
@@ -14,6 +14,11 @@ process IGV {
path ("${bigwig_replicate_publish_dir}/*")
path ("${peak_replicate_publish_dir}/*")
path ("${consensus_replicate_publish_dir}/*")
+
+ path ("${bigwig_library_nf_publish_dir}/*")
+ path ("${peak_library_s_genrich_publish_dir}/*")
+ path ("${peak_library_j_genrich_publish_dir}/*")
+
val bigwig_library_publish_dir
val peak_library_publish_dir
val consensus_library_publish_dir
@@ -21,6 +26,10 @@ process IGV {
val peak_replicate_publish_dir
val consensus_replicate_publish_dir
+ val bigwig_library_nf_publish_dir
+ val peak_library_s_genrich_publish_dir
+ val peak_library_j_genrich_publish_dir
+
output:
// Publish fasta file while copyTo fails when the source and destination buckets are in different regions
path "*files.txt" , emit: txt
@@ -40,6 +49,9 @@ process IGV {
find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_replicate_publish_dir" || test \$? = 1; } > mRp_bigwig.igv.txt
find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_replicate_publish_dir" || test \$? = 1; } > mRp_peaks.igv.txt
find * -type l -name "*.bed" -exec echo -e ""{}"\\t0,0,0" \\; | { grep "^$consensus_replicate_publish_dir" || test \$? = 1; } > mRp_bed.igv.txt
+ find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_library_nf_publish_dir" || test \$? = 1; } > mLb_nf_bigwig.igv.txt
+ find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_s_genrich_publish_dir" || test \$? = 1; } > mLb_s_genrich_peaks.igv.txt
+ find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_j_genrich_publish_dir" || test \$? = 1; } > mLb_j_genrich_peaks.igv.txt
cat *.txt > igv_files.txt
igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../'
diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf
index 6064fd7d..676332dd 100644
--- a/modules/local/multiqc.nf
+++ b/modules/local/multiqc.nf
@@ -40,6 +40,10 @@ process MULTIQC {
path ('macs2/merged_library/annotation/*')
path ('macs2/merged_library/featurecounts/*')
+ path ('genrich/merged_library/sep/peaks/*')
+ path ('genrich/merged_library/sep/peaks/*')
+ path ('genrich/merged_library/sep/annotation/*')
+
path ('alignment/merged_replicate/*')
path ('alignment/merged_replicate/*')
path ('alignment/merged_replicate/*')
@@ -55,6 +59,10 @@ process MULTIQC {
path ('deseq2_replicate/*')
path ('deseq2_replicate/*')
+ path ('genrich/merged_library/joint/peaks/*')
+ path ('genrich/merged_library/joint/peaks/*')
+ path ('genrich/merged_library/joint/annotation/*')
+
output:
path "*multiqc_report.html", emit: report
path "*_data" , emit: data
diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_peaks_qc.nf
similarity index 96%
rename from modules/local/plot_macs2_qc.nf
rename to modules/local/plot_peaks_qc.nf
index a2c39b02..d0e141d2 100644
--- a/modules/local/plot_macs2_qc.nf
+++ b/modules/local/plot_peaks_qc.nf
@@ -1,4 +1,4 @@
-process PLOT_MACS2_QC {
+process PLOT_PEAKS_QC {
label 'process_medium'
conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2"
@@ -22,7 +22,7 @@ process PLOT_MACS2_QC {
def args = task.ext.args ?: ''
def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak'
"""
- plot_macs2_qc.r \\
+ plot_peaks_qc.r \\
-i ${peaks.join(',')} \\
-s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\
$args
diff --git a/modules/nf-core/genrich/main.nf b/modules/nf-core/genrich/main.nf
new file mode 100644
index 00000000..69a6bd8c
--- /dev/null
+++ b/modules/nf-core/genrich/main.nf
@@ -0,0 +1,69 @@
+process GENRICH {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "bioconda::genrich=0.6.1"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genrich:0.6.1--h5bf99c6_1' :
+ 'biocontainers/genrich:0.6.1--h5bf99c6_1' }"
+
+ input:
+ tuple val(meta), path(treatment_bam), path(control_bam)
+ path blacklist_bed
+ val save_pvalues
+ val save_pileup
+ val save_bed
+ val save_duplicates
+
+ output:
+ tuple val(meta), path("*.narrowPeak") , emit: peak
+ tuple val(meta), path("*pvalues.bedGraph"), optional:true, emit: bedgraph_pvalues
+ tuple val(meta), path("*pileup.bedGraph"), optional:true, emit: bedgraph_pileup
+ tuple val(meta), path("*intervals.bed"), optional:true, emit: bed_intervals
+ tuple val(meta), path("*duplicates.txt"), optional:true, emit: duplicates
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ""
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def layout = (!args.contains("-y") && meta.single_end) ? "-y" : ""
+ def treatment = treatment_bam ? "-t ${treatment_bam.sort().join(',')}" : ""
+ def control = control_bam ? "-c ${control_bam.sort().join(',')}" : ""
+ def blacklist = blacklist_bed ? "-E $blacklist_bed" : ""
+ def pvalues = save_pvalues ? "-f ${prefix}.pvalues.bedGraph" : ""
+ def pileup = save_pileup ? "-k ${prefix}.pileup.bedGraph" : ""
+ def bed = save_bed ? "-b ${prefix}.intervals.bed" : ""
+ def duplicates = ""
+
+ if (save_duplicates) {
+ if (args.contains('-r')) {
+ duplicates = "-R ${prefix}.duplicates.txt"
+ } else {
+ log.info '[Genrich] Duplicates can only be saved if they are filtered, defaulting to -r option (Remove PCR duplicates).'
+ duplicates = "-r -R ${prefix}.duplicates.txt"
+ }
+ }
+
+ """
+ Genrich \\
+ $treatment \\
+ $args \\
+ $layout \\
+ $blacklist \\
+ -o ${prefix}.narrowPeak \\
+ $pvalues \\
+ $pileup \\
+ $bed \\
+ $duplicates \\
+ $control
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genrich: \$(echo \$(Genrich --version 2>&1) | sed 's/^Genrich, version //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/nf-core/genrich/meta.yml b/modules/nf-core/genrich/meta.yml
new file mode 100644
index 00000000..414821f6
--- /dev/null
+++ b/modules/nf-core/genrich/meta.yml
@@ -0,0 +1,80 @@
+name: genrich
+description: Peak-calling for ChIP-seq and ATAC-seq enrichment experiments
+keywords:
+ - peak-calling
+ - ChIP-seq
+ - ATAC-seq
+tools:
+ - genrich:
+ description: |
+ Genrich is a peak-caller for genomic enrichment assays (e.g. ChIP-seq, ATAC-seq).
+ It analyzes alignment files generated following the assay and produces a file
+ detailing peaks of significant enrichment.
+ homepage: https://github.com/jsh58/Genrich
+ documentation: https://github.com/jsh58/Genrich#readme
+ tool_dev_url: https://github.com/jsh58/Genrich
+
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - treatment_bam:
+ type: file
+ description: Coordinate sorted BAM/SAM file from treatment sample or list of BAM/SAM files from biological replicates
+ pattern: "*.{bam,sam}"
+ - control_bam:
+ type: file
+ description: Coordinate sorted BAM/SAM file from control sample or list of BAM/SAM files from control samples
+ pattern: "*.{bam,sam}"
+ - blacklist_bed:
+ type: file
+ description: Bed file containing genomic intervals to exclude from the analysis
+ pattern: "*.{bed}"
+ - save_pvalues:
+ type: boolean
+ description: Create bedgraph-ish file for p/q-values file
+ - save_pileup:
+ type: boolean
+ description: Create bedgraph-ish file for pileups and p-values
+ - save_bed:
+ type: boolean
+ description: Create BED file for reads/fragments/intervals
+ - save_duplicates:
+ type: boolean
+ description: Create PCR duplicates file (only works if -r option is set)
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - peaks:
+ type: file
+ description: Output file is in ENCODE narrowPeak format
+ pattern: "*.{narrowPeak}"
+ - bedgraph_pvalues:
+ type: file
+ description: bedGraph file containing p/q values
+ pattern: "*.{pvalues.bedGraph}"
+ - bedgraph_pileup:
+ type: file
+ description: bedGraph file containing pileups and p-values
+ pattern: "*.{pileup.bedGraph}"
+ - bed_intervals:
+ type: file
+ description: Bed file containing annotated intervals
+ pattern: "*.{intervals.bed}"
+ - duplicates:
+ type: file
+ description: Text output file containing intervals corresponding to PCR duplicates
+ pattern: "*.{intervals.txt}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@JoseEspinosa"
+ - "@samuelruizperez"
diff --git a/nextflow.config b/nextflow.config
index 9f60b156..62a5c420 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,18 +43,31 @@ params {
skip_merge_replicates = false
save_align_intermeds = false
save_unaligned = false
+ analyze_multimappers = 0
+
// Options: Peaks
narrow_peak = false
+ skip_peak_qc = false
+ skip_peak_annotation = false
+
+ // Options: MACS2
broad_cutoff = 0.1
macs_fdr = null
macs_pvalue = null
min_reps_consensus = 1
save_macs_pileup = false
- skip_peak_qc = false
- skip_peak_annotation = false
skip_consensus_peaks = false
+ // Options: Genrich
+ skip_genrich_sep = true
+ genrich_qvalue = 0.01
+ genrich_pvalue = 0.01
+ save_genrich_pileup = true
+ save_genrich_pvalues = true
+ save_genrich_bed = true
+ save_genrich_duplicates = true
+
// Options: DESeq2 QC
deseq2_vst = true
skip_deseq2_qc = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2e384c9e..9402a7d4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -325,6 +325,13 @@
"hidden": true,
"description": "BAMTools JSON file with custom filters for single-end data.",
"fa_icon": "fas fa-cog"
+ },
+ "analyze_multimappers": {
+ "type": "integer",
+ "default": 0,
+ "description": "Output this number of multimapping reads from the alignment step and include them in peak calling.",
+ "help_text": "This will include non-unique reads in the alignment (Bowtie2) and peak calling steps (Genrich). Default of 0 means only the best alignment is used and multimappers are ignored.",
+ "fa_icon": "fas fa-chart-bar"
}
}
},
@@ -382,6 +389,43 @@
"type": "boolean",
"description": "Skip consensus peak generation, annotation and counting.",
"fa_icon": "fas fa-fast-forward"
+ },
+ "skip_genrich_sep": {
+ "type": "boolean",
+ "description": "Skip Genrich individual (separate replicates) peak calling. If skipped, each group of biological replicates will be analysed together, which is the recommended approach.",
+ "fa_icon": "fas fa-fast-forward"
+ },
+ "save_genrich_pileup": {
+ "type": "boolean",
+ "description": "Instruct Genrich to create bedGraph files normalised to signal per million reads.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_genrich_pvalues": {
+ "type": "boolean",
+ "description": "Instruct Genrich to create bedGraph files with p-values.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_genrich_bed": {
+ "type": "boolean",
+ "description": "Instruct Genrich to create BED files.",
+ "fa_icon": "fas fa-save"
+ },
+ "save_genrich_duplicates": {
+ "type": "boolean",
+ "description": "Instruct Genrich to save duplicates.",
+ "fa_icon": "fas fa-save"
+ },
+ "genrich_qvalue": {
+ "type": "number",
+ "default": 0.1,
+ "description": "Minimum q-value cutoff for peak detection.",
+ "fa_icon": "fas fa-sort-amount-down"
+ },
+ "genrich_pvalue": {
+ "type": "number",
+ "default": 0.1,
+ "description": "Minimum p-value cutoff for peak detection.",
+ "fa_icon": "fas fa-sort-amount-down"
}
}
},
diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf
new file mode 100644
index 00000000..31bf8521
--- /dev/null
+++ b/subworkflows/local/bam_nofilter_bamtools.nf
@@ -0,0 +1,75 @@
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_SE } from '../../modules/nf-core/samtools/sort/main'
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_PE } from '../../modules/nf-core/samtools/sort/main'
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
+include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main'
+include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main'
+
+workflow BAM_NOFILTER_BAMTOOLS {
+ take:
+ ch_bam // channel: [ val(meta), [ bam ] ]
+ ch_fasta // channel: [ fasta ]
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ ch_bam
+ .branch {
+ meta, bam ->
+ single_end: meta.single_end
+ return [ meta, bam ]
+ paired_end: !meta.single_end
+ return [ meta, bam ]
+ }
+ .set { ch_bam_b }
+
+ //
+ // Index SE BAM file
+ //
+ SAMTOOLS_INDEX {
+ ch_bam_b.single_end
+ }
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+
+ //
+ // Name sort SE BAM
+ //
+ SAMTOOLS_SORT_SE (
+ ch_bam_b.single_end
+ )
+ ch_versions = ch_versions.mix(SAMTOOLS_SORT_SE.out.versions.first())
+
+ //
+ // Run samtools stats, flagstat and idxstats on SE BAM
+ //
+ BAM_STATS_SAMTOOLS (
+ ch_bam_b.single_end.join(SAMTOOLS_INDEX.out.bai),
+ ch_fasta
+ )
+ ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first())
+
+ //
+ // Name sort PE BAM
+ //
+ SAMTOOLS_SORT_PE (
+ ch_bam_b.paired_end
+ )
+ ch_versions = ch_versions.mix(SAMTOOLS_SORT_PE.out.versions.first())
+
+ //
+ // Sort, index PE BAM file and run samtools stats, flagstat and idxstats
+ //
+ BAM_SORT_STATS_SAMTOOLS (
+ SAMTOOLS_SORT_PE.out.bam,
+ ch_fasta
+ )
+ ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first())
+
+ emit:
+ name_bam = SAMTOOLS_SORT_PE.out.bam.mix(SAMTOOLS_SORT_SE.out.bam) // channel: [ val(meta), [ bam ] ]
+ bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ]
+ stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ]
+ flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ]
+ idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats.mix(BAM_STATS_SAMTOOLS.out.idxstats) // channel: [ val(meta), [ idxstats ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf
new file mode 100644
index 00000000..e162f628
--- /dev/null
+++ b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf
@@ -0,0 +1,183 @@
+//
+// Call peaks with Genrich, annotate with HOMER and perform downstream QC
+//
+
+include { GENRICH } from '../../modules/nf-core/genrich/main'
+include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main'
+
+include { FRIP_SCORE } from '../../modules/local/frip_score'
+include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks'
+
+include { PLOT_PEAKS_QC as PLOT_GENRICH_QC } from '../../modules/local/plot_peaks_qc'
+
+include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks'
+
+workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER {
+ take:
+ ch_bam // channel: [ val(meta), [ treatment_bam ], [ control_bam ] ]
+ ch_fasta // channel: [ fasta ]
+ ch_gtf // channel: [ gtf ]
+ genome // string: genome name
+ ch_blacklist_regions // channel: [ bed ]
+ annotate_peaks_suffix // string: suffix for input HOMER annotate peaks files to be trimmed off
+ ch_peak_count_header_multiqc // channel: [ header_file ]
+ ch_frip_score_multiqc // channel: [ header_file ]
+ ch_peak_annotation_header_multiqc // channel: [ header_file ]
+// add broad_peak options
+ is_narrow_peak // boolean: true/false
+ skip_peak_annotation // boolean: true/false
+ skip_peak_qc // boolean: true/false
+
+ save_pvalues // boolean: true/false
+ save_pileup // boolean: true/false
+ save_bed // boolean: true/false
+ save_duplicates // boolean: true/false
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Call peaks with Genrich
+ //
+ GENRICH (
+ ch_bam,
+ ch_blacklist_regions,
+ save_pvalues,
+ save_pileup,
+ save_bed,
+ save_duplicates
+
+ )
+ ch_versions = ch_versions.mix(GENRICH.out.versions.first())
+
+ //
+ // Filter out samples with 0 Genrich peaks called
+ //
+ GENRICH
+ .out
+ .peak
+ .filter {
+ meta, peaks ->
+ peaks.size() > 0
+ }
+ .set { ch_genrich_peaks }
+
+
+ // Create channels: [ meta, ip_bam, peaks ]
+
+ ch_bam
+ .join(ch_genrich_peaks, by: [0])
+ .map {
+ meta, treatment_bam, control_bam, peaks ->
+ [ meta, treatment_bam, peaks ]
+ }
+ .set { ch_bam_peaks }
+
+ // split ch_bam_peaks by treatment_bam
+
+ ch_bam_peaks
+ .transpose()
+ .map {
+ meta, treatment_bam, peaks ->
+ def meta_clone = meta.clone()
+ meta_clone.id = treatment_bam.getSimpleName()
+ [ meta_clone, treatment_bam, peaks ]
+ }
+ .set { ch_bam_peaks_treatment_bam }
+
+ //
+ // Calculate FRiP score
+ //
+ FRIP_SCORE (
+ ch_bam_peaks_treatment_bam
+ )
+ ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first())
+
+ // Create channels: [ meta, peaks, frip ]
+ ch_bam_peaks_treatment_bam
+ .join(FRIP_SCORE.out.txt, by: [0])
+ .map {
+ meta, ip_bam, peaks, frip ->
+ [ meta, peaks, frip ]
+ }
+ .set { ch_bam_peak_frip }
+
+ //
+ // FRiP score custom content for MultiQC
+ //
+ MULTIQC_CUSTOM_PEAKS (
+ ch_bam_peak_frip,
+ ch_peak_count_header_multiqc,
+ ch_frip_score_multiqc
+ )
+ ch_versions = ch_versions.mix(MULTIQC_CUSTOM_PEAKS.out.versions.first())
+
+ ch_homer_annotatepeaks = Channel.empty()
+ ch_homer_det_annotatepeaks = Channel.empty()
+ ch_plot_genrich_qc_txt = Channel.empty()
+ ch_plot_genrich_qc_pdf = Channel.empty()
+ ch_plot_homer_annotatepeaks_txt = Channel.empty()
+ ch_plot_homer_annotatepeaks_pdf = Channel.empty()
+ ch_plot_homer_annotatepeaks_tsv = Channel.empty()
+ if (!skip_peak_annotation) {
+ //
+ // Annotate peaks with HOMER
+ //
+ HOMER_ANNOTATEPEAKS (
+ ch_genrich_peaks,
+ ch_fasta,
+ ch_gtf
+ )
+ ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt
+ ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first())
+
+ if (!skip_peak_qc) {
+ //
+ // Genrich QC plots with R
+ //
+ PLOT_GENRICH_QC (
+ ch_genrich_peaks.collect{it[1]},
+ is_narrow_peak
+ )
+ ch_plot_genrich_qc_txt = PLOT_GENRICH_QC.out.txt
+ ch_plot_genrich_qc_pdf = PLOT_GENRICH_QC.out.pdf
+ ch_versions = ch_versions.mix(PLOT_GENRICH_QC.out.versions)
+
+ //
+ // Peak annotation QC plots with R
+ //
+ PLOT_HOMER_ANNOTATEPEAKS (
+ HOMER_ANNOTATEPEAKS.out.txt.collect{it[1]},
+ ch_peak_annotation_header_multiqc,
+ annotate_peaks_suffix
+ )
+ ch_plot_homer_annotatepeaks_txt = PLOT_HOMER_ANNOTATEPEAKS.out.txt
+ ch_plot_homer_annotatepeaks_pdf = PLOT_HOMER_ANNOTATEPEAKS.out.pdf
+ ch_plot_homer_annotatepeaks_tsv = PLOT_HOMER_ANNOTATEPEAKS.out.tsv
+ ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions)
+ }
+ }
+
+ emit:
+ peaks = ch_genrich_peaks // channel: [ val(meta), [ peaks ] ]
+ bed_intervals = GENRICH.out.bed_intervals // channel: [ val(meta), [ bed ] ]
+ bedgraph_pileup = GENRICH.out.bedgraph_pileup // channel: [ val(meta), [ bedgraph ] ]
+ bedgraph_pvalues = GENRICH.out.bedgraph_pvalues // channel: [ val(meta), [ bedgraph ] ]
+ duplicates = GENRICH.out.duplicates // channel: [ val(meta), [ bedgraph ] ]
+
+ frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ]
+
+ frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ]
+ peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ]
+
+ homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ]
+ plot_genrich_qc_txt = ch_plot_genrich_qc_txt // channel: [ txt ]
+ plot_genrich_qc_pdf = ch_plot_genrich_qc_pdf // channel: [ pdf ]
+
+ plot_homer_annotatepeaks_txt = ch_plot_homer_annotatepeaks_txt // channel: [ txt ]
+ plot_homer_annotatepeaks_pdf = ch_plot_homer_annotatepeaks_pdf // channel: [ pdf ]
+ plot_homer_annotatepeaks_tsv = ch_plot_homer_annotatepeaks_tsv // channel: [ tsv ]
+
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf
index 4c2a8710..42d3195e 100644
--- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf
+++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf
@@ -7,7 +7,7 @@ include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatep
include { FRIP_SCORE } from '../../modules/local/frip_score'
include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks'
-include { PLOT_MACS2_QC } from '../../modules/local/plot_macs2_qc'
+include { PLOT_PEAKS_QC as PLOT_MACS2_QC } from '../../modules/local/plot_peaks_qc'
include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks'
workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER {
@@ -23,7 +23,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER {
is_narrow_peak // boolean: true/false
skip_peak_annotation // boolean: true/false
skip_peak_qc // boolean: true/false
-
+
main:
ch_versions = Channel.empty()
@@ -43,7 +43,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER {
MACS2_CALLPEAK
.out
.peak
- .filter {
+ .filter {
meta, peaks ->
peaks.size() > 0
}
@@ -102,7 +102,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER {
)
ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt
ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first())
-
+
if (!skip_peak_qc) {
//
// MACS2 QC plots with R
@@ -138,7 +138,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER {
bedgraph = MACS2_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ]
frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ]
-
+
frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ]
peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ]
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index d6dbffcf..cd54d112 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -143,13 +143,15 @@ workflow PREPARE_GENOME {
// Prepare genome intervals for filtering by removing regions in blacklist file
//
ch_genome_filtered_bed = Channel.empty()
+ ch_genome_blacklist_bed = Channel.empty()
GENOME_BLACKLIST_REGIONS (
ch_chrom_sizes,
ch_blacklist.ifEmpty([]),
params.mito_name ?: '',
params.keep_mito
)
- ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed
+ ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.whitelist_bed
+ ch_genome_blacklist_bed = GENOME_BLACKLIST_REGIONS.out.blacklist_bed
ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions)
//
@@ -244,7 +246,8 @@ workflow PREPARE_GENOME {
gene_bed = ch_gene_bed // path: gene.bed
tss_bed = ch_tss_bed // path: tss.bed
chrom_sizes = ch_chrom_sizes // path: genome.sizes
- filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
+ filtered_bed = ch_genome_filtered_bed // path: *.whitelist_regions.bed
+ blacklist_bed = ch_genome_blacklist_bed // path: *.blacklist_regions.bed
bwa_index = ch_bwa_index // path: bwa/index/
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
chromap_index = ch_chromap_index // path: genome.index
diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf
index 5daf5839..9796a5e0 100644
--- a/workflows/atacseq.nf
+++ b/workflows/atacseq.nf
@@ -43,8 +43,19 @@ ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config)
// Header files for MultiQC
ch_multiqc_merged_library_peak_count_header = file("$projectDir/assets/multiqc/merged_library_peak_count_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_sep_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_joint_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt", checkIfExists: true)
+
+
ch_multiqc_merged_library_frip_score_header = file("$projectDir/assets/multiqc/merged_library_frip_score_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_sep_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_joint_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt", checkIfExists: true)
+
ch_multiqc_merged_library_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_peak_annotation_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_sep_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt", checkIfExists: true)
+ch_multiqc_merged_library_joint_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt", checkIfExists: true)
+
+
ch_multiqc_merged_library_deseq2_pca_header = file("$projectDir/assets/multiqc/merged_library_deseq2_pca_header.txt", checkIfExists: true)
ch_multiqc_merged_library_deseq2_clustering_header = file("$projectDir/assets/multiqc/merged_library_deseq2_clustering_header.txt", checkIfExists: true)
@@ -71,14 +82,21 @@ include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
include { ALIGN_STAR } from '../subworkflows/local/align_star'
include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools'
include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools'
+include { BAM_NOFILTER_BAMTOOLS as MERGED_LIBRARY_NOFILTER_BAM } from '../subworkflows/local/bam_nofilter_bamtools'
include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
+include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_NF_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_REPLICATE_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
-include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
-include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
+include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
+include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_LIBRARY_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf'
include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_REPLICATE_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf'
+include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf'
+include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf'
+
+
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT NF-CORE MODULES/SUBWORKFLOWS
@@ -157,10 +175,20 @@ workflow ATACSEQ {
)
ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
+ //
+ // Check if analyze_multimappers is set
+ //
+ if (params.analyze_multimappers != 0) {
+ if (params.aligner == 'bwa' || params.aligner == 'chromap') {
+ exit 1, 'Multimapping read analysis is so far only supported for Bowtie2 and STAR. Remove the --analyze_multimappers parameter or change --aligner to bowtie2 or star.'
+ }
+ }
+
//
// SUBWORKFLOW: Alignment with BWA & BAM QC
//
ch_genome_bam = Channel.empty()
+ ch_genome_bam_index = Channel.empty()
ch_samtools_stats = Channel.empty()
ch_samtools_flagstat = Channel.empty()
ch_samtools_idxstats = Channel.empty()
@@ -175,6 +203,7 @@ workflow ATACSEQ {
}
)
ch_genome_bam = FASTQ_ALIGN_BWA.out.bam
+ ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai
ch_samtools_stats = FASTQ_ALIGN_BWA.out.stats
ch_samtools_flagstat = FASTQ_ALIGN_BWA.out.flagstat
ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats
@@ -196,6 +225,7 @@ workflow ATACSEQ {
}
)
ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam
+ ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai
ch_samtools_stats = FASTQ_ALIGN_BOWTIE2.out.stats
ch_samtools_flagstat = FASTQ_ALIGN_BOWTIE2.out.flagstat
ch_samtools_idxstats = FASTQ_ALIGN_BOWTIE2.out.idxstats
@@ -219,6 +249,7 @@ workflow ATACSEQ {
[]
)
ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam
+ ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai
ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats
ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat
ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats
@@ -240,6 +271,7 @@ workflow ATACSEQ {
params.seq_center ?: ''
)
ch_genome_bam = ALIGN_STAR.out.bam
+ ch_genome_bam_index = ALIGN_STAR.out.bai
ch_samtools_stats = ALIGN_STAR.out.stats
ch_samtools_flagstat = ALIGN_STAR.out.flagstat
ch_samtools_idxstats = ALIGN_STAR.out.idxstats
@@ -293,17 +325,7 @@ workflow ATACSEQ {
// SUBWORKFLOW: Filter BAM file
//
MERGED_LIBRARY_FILTER_BAM (
- MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam
- .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true)
- .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true)
- .map {
- meta, bam, bai, csi ->
- if (bai) {
- [ meta, bam, bai ]
- } else {
- [ meta, bam, csi ]
- }
- },
+ MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam.join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]),
PREPARE_GENOME.out.filtered_bed.first(),
PREPARE_GENOME
.out
@@ -384,16 +406,7 @@ workflow ATACSEQ {
MERGED_LIBRARY_FILTER_BAM
.out
.bam
- .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0], remainder: true)
- .join(MERGED_LIBRARY_FILTER_BAM.out.csi, by: [0], remainder: true)
- .map {
- meta, bam, bai, csi ->
- if (bai) {
- [ meta, bam, bai ]
- } else {
- [ meta, bam, csi ]
- }
- }
+ .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0])
.set { ch_bam_bai }
if (params.with_control) {
@@ -446,7 +459,7 @@ workflow ATACSEQ {
//
// SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC
//
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS (
+ MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 (
ch_bam_library,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.gtf,
@@ -459,7 +472,12 @@ workflow ATACSEQ {
params.skip_peak_annotation,
params.skip_peak_qc
)
- ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.versions)
+ ch_library_peaks = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peaks
+ ch_library_frip_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc
+ ch_library_peak_count_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc
+ ch_library_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv
+
+ ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.versions)
//
// SUBWORKFLOW: Consensus peaks analysis
@@ -470,7 +488,7 @@ workflow ATACSEQ {
ch_deseq2_clustering_library_multiqc = Channel.empty()
if (!params.skip_consensus_peaks) {
MERGED_LIBRARY_CONSENSUS_PEAKS (
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks,
+ ch_library_peaks,
ch_bam_library,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.gtf,
@@ -487,21 +505,107 @@ workflow ATACSEQ {
ch_versions = ch_versions.mix(MERGED_LIBRARY_CONSENSUS_PEAKS.out.versions)
}
+ //
+ // SUBWORKFLOW: Sort by name and generate stats for unfiltered BAMs
+ //
+ MERGED_LIBRARY_NOFILTER_BAM (
+ PICARD_MERGESAMFILES_LIBRARY.out.bam,
+ PREPARE_GENOME
+ .out
+ .fasta
+ .map {
+ [ [:], it ]
+ }
+ )
+ ch_bam = MERGED_LIBRARY_NOFILTER_BAM.out.bam
+ ch_name_bam = MERGED_LIBRARY_NOFILTER_BAM.out.name_bam
+ ch_versions = ch_versions.mix(MERGED_LIBRARY_NOFILTER_BAM.out.versions.first())
+
+ //
+ // SUBWORKFLOW: Normalised bigWig coverage tracks
+ //
+ MERGED_LIBRARY_NF_BAM_TO_BIGWIG (
+ ch_bam.join(MERGED_LIBRARY_NOFILTER_BAM.out.flagstat, by: [0]),
+ PREPARE_GENOME.out.chrom_sizes
+ )
+ ch_versions = ch_versions.mix(MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.versions)
+
+
+ // Create channels: [ meta, [bam] ] or [ meta, [ bam, control_bam ] ] (now for Genrich)
+ if (params.with_control) {
+ ch_name_bam
+ .map {
+ meta, bam ->
+ meta.control ? null : [ meta.id, [ bam ] ]
+ }
+ .set { ch_control_bam }
+
+ ch_name_bam
+ .map {
+ meta, bam ->
+ meta.control ? [ meta.control, meta, [ bam ] ] : null
+ }
+ .combine(ch_control_bam, by: 0)
+ .map { it -> [ it[1] , it[2] + it[4] ] }
+ .set { ch_name_bam }
+ }
+
+ // Create channel: [ val(meta), bam, control_bam ] (now for Genrich)
+ if (params.with_control) {
+ ch_name_bam
+ .map {
+ meta, bams ->
+ [ meta , bams[0], bams[1] ]
+ }
+ .set { ch_merged_library_bams_sep }
+ } else {
+ ch_name_bam
+ .map {
+ meta, bam ->
+ [ meta , bam, [] ]
+ }
+ .set { ch_merged_library_bams_sep }
+ }
+
+ //
+ // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC
+ //
+ ch_library_genrich_sep_peaks = Channel.empty()
+ ch_library_genrich_sep_frip_multiqc = Channel.empty()
+ ch_library_genrich_sep_peak_count_multiqc = Channel.empty()
+ ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = Channel.empty()
+ if (!params.skip_genrich_sep) {
+ MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH (
+ ch_merged_library_bams_sep,
+ PREPARE_GENOME.out.fasta,
+ PREPARE_GENOME.out.gtf,
+ params.genome,
+ PREPARE_GENOME.out.blacklist_bed.first(),
+ ".mLb.genrich.speaks.annotatePeaks.txt",
+ ch_multiqc_merged_library_sep_genrich_peak_count_header,
+ ch_multiqc_merged_library_sep_genrich_frip_score_header,
+ ch_multiqc_merged_library_sep_genrich_peak_annotation_header,
+ params.narrow_peak,
+ params.skip_peak_annotation,
+ params.skip_peak_qc,
+ params.save_genrich_pvalues,
+ params.save_genrich_pileup,
+ params.save_genrich_bed,
+ params.save_genrich_duplicates
+ )
+ ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks
+ ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc
+ ch_library_genrich_sep_peak_count_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc
+ ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv
+ ch_versions = ch_versions.mix(MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.versions)
+ }
+
// Create channels: [ meta, bam, bai, peak_file ]
MERGED_LIBRARY_MARKDUPLICATES_PICARD
.out
.bam
- .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true)
- .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true)
- .map {
- meta, bam, bai, csi ->
- if (bai) {
- [ meta, bam, bai ]
- } else {
- [ meta, bam, csi ]
- }
- }
- .join(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks, by: [0])
+ .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0])
+ .join(ch_library_peaks, by: [0])
.set { ch_bam_peaks }
//
@@ -635,7 +739,8 @@ workflow ATACSEQ {
//
// SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC
//
- MERGED_REPLICATE_CALL_ANNOTATE_PEAKS (
+
+ MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 (
ch_bam_replicate,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.gtf,
@@ -648,18 +753,18 @@ workflow ATACSEQ {
params.skip_peak_annotation,
params.skip_peak_qc
)
- ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks
- ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.frip_multiqc
- ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc
- ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv
- ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.versions)
+ ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks
+ ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc
+ ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc
+ ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv
+ ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.versions)
//
// SUBWORKFLOW: Consensus peaks analysis
//
if (!params.skip_consensus_peaks) {
MERGED_REPLICATE_CONSENSUS_PEAKS (
- MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks,
+ MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks,
ch_merged_library_replicate_bam,
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.gtf,
@@ -677,6 +782,50 @@ workflow ATACSEQ {
}
}
+ // Check if we have multiple replicates (now for Genrich)
+ ch_merged_library_bams_sep
+ .map {
+ meta, bam, control_bam ->
+ def meta_clone = meta.clone()
+ meta_clone.id = meta_clone.id - ~/_REP\d+$/
+ meta_clone.control = meta_clone.control ? meta_clone.control - ~/_REP\d+$/ : ""
+ [ meta_clone.id, meta_clone, bam, control_bam ]
+ }
+ .groupTuple()
+ .map {
+ id, metas, bams, control_bams ->
+ [ metas[0], bams.flatten(), control_bams.flatten() ]
+ }
+ .set { ch_merged_library_bams_joint }
+
+ //
+ // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC
+ //
+ MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH (
+ ch_merged_library_bams_joint,
+ PREPARE_GENOME.out.fasta,
+ PREPARE_GENOME.out.gtf,
+ params.genome,
+ PREPARE_GENOME.out.blacklist_bed.first(),
+ ".mLb.genrich.jpeaks.annotatePeaks.txt",
+ ch_multiqc_merged_library_joint_genrich_peak_count_header,
+ ch_multiqc_merged_library_joint_genrich_frip_score_header,
+ ch_multiqc_merged_library_joint_genrich_peak_annotation_header,
+ params.narrow_peak,
+ params.skip_peak_annotation,
+ params.skip_peak_qc,
+ params.save_genrich_pvalues,
+ params.save_genrich_pileup,
+ params.save_genrich_bed,
+ params.save_genrich_duplicates
+ )
+ ch_library_genrich_joint_peaks = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks
+ ch_library_genrich_joint_frip_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc
+ ch_library_genrich_joint_peak_count_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc
+ ch_library_genrich_joint_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv
+
+ ch_versions = ch_versions.mix(MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.versions)
+
//
// MODULE: Create IGV session
//
@@ -685,11 +834,16 @@ workflow ATACSEQ {
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.fai,
MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]),
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]),
+ ch_library_peaks.collect{it[1]}.ifEmpty([]),
ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]),
ch_ucsc_bedgraphtobigwig_replicate_bigwig.collect{it[1]}.ifEmpty([]),
ch_macs2_replicate_peaks.collect{it[1]}.ifEmpty([]),
ch_macs2_consensus_replicate_bed.collect{it[1]}.ifEmpty([]),
+
+ MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_sep_peaks.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_joint_peaks.collect{it[1]}.ifEmpty([]),
+
"${params.aligner}/merged_library/bigwig",
{ ["${params.aligner}/merged_library/macs2",
params.narrow_peak? '/narrow_peak' : '/broad_peak'
@@ -706,6 +860,15 @@ workflow ATACSEQ {
params.narrow_peak? '/narrow_peak' : '/broad_peak',
"/consensus"
].join('') },
+
+ "${params.aligner}/merged_library/genrich/bigwig",
+ { ["${params.aligner}/merged_library/genrich/sep",
+ params.narrow_peak? '/narrow_peak' : '/broad_peak'
+ ].join('') },
+ { ["${params.aligner}/merged_library/genrich/joint",
+ params.narrow_peak? '/narrow_peak' : '/broad_peak'
+ ].join('') },
+
)
ch_versions = ch_versions.mix(IGV.out.versions)
}
@@ -756,11 +919,15 @@ workflow ATACSEQ {
ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]),
ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]),
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.frip_multiqc.collect{it[1]}.ifEmpty([]),
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]),
- MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]),
+ ch_library_frip_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_peak_count_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]),
ch_featurecounts_library_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_sep_frip_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_sep_peak_count_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_sep_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]),
+
ch_markduplicates_replicate_stats.collect{it[1]}.ifEmpty([]),
ch_markduplicates_replicate_flagstat.collect{it[1]}.ifEmpty([]),
ch_markduplicates_replicate_idxstats.collect{it[1]}.ifEmpty([]),
@@ -774,7 +941,11 @@ workflow ATACSEQ {
ch_deseq2_pca_library_multiqc.collect().ifEmpty([]),
ch_deseq2_clustering_library_multiqc.collect().ifEmpty([]),
ch_deseq2_pca_replicate_multiqc.collect().ifEmpty([]),
- ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([])
+ ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([]),
+
+ ch_library_genrich_joint_frip_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_joint_peak_count_multiqc.collect{it[1]}.ifEmpty([]),
+ ch_library_genrich_joint_plot_homer_annotatepeaks_tsv.collect().ifEmpty([])
)
multiqc_report = MULTIQC.out.report.toList()
}