diff --git a/assets/merged_library_deseq2_clustering_header.txt b/assets/merged_library_deseq2_clustering_header.txt
new file mode 100644
index 00000000..cd997676
--- /dev/null
+++ b/assets/merged_library_deseq2_clustering_header.txt
@@ -0,0 +1,12 @@
+#id: 'mlib_deseq2_clustering'
+#section_name: 'MERGED LIB: DESeq2 sample similarity'
+#description: "Matrix is generated from clustering with Euclidean distances between
+# DESeq2
+# rlog values for each sample
+# in the deseq2_qc.r script."
+#plot_type: 'heatmap'
+#anchor: 'mlib_deseq2_clustering'
+#pconfig:
+# title: 'DESeq2: Heatmap of the sample-to-sample distances'
+# xlab: True
+# reverseColors: True
diff --git a/assets/merged_library_deseq2_pca_header.txt b/assets/merged_library_deseq2_pca_header.txt
new file mode 100644
index 00000000..71a9783f
--- /dev/null
+++ b/assets/merged_library_deseq2_pca_header.txt
@@ -0,0 +1,11 @@
+#id: 'mlib_deseq2_pca'
+#section_name: 'MERGED LIB: DESeq2 PCA plot'
+#description: "PCA plot of the samples in the experiment.
+# These values are calculated using DESeq2
+# in the deseq2_qc.r script."
+#plot_type: 'scatter'
+#anchor: 'mlib_deseq2_pca'
+#pconfig:
+# title: 'DESeq2: Principal component plot'
+# xlab: PC1
+# ylab: PC2
diff --git a/assets/merged_replicate_deseq2_clustering_header.txt b/assets/merged_replicate_deseq2_clustering_header.txt
new file mode 100644
index 00000000..aed0c093
--- /dev/null
+++ b/assets/merged_replicate_deseq2_clustering_header.txt
@@ -0,0 +1,12 @@
+#id: 'mrep_deseq2_clustering'
+#section_name: 'MERGED REP: DESeq2 sample similarity'
+#description: "Matrix is generated from clustering with Euclidean distances between
+# DESeq2
+# rlog values for each sample
+# in the deseq2_qc.r script."
+#plot_type: 'heatmap'
+#anchor: 'mrep_deseq2_clustering'
+#pconfig:
+# title: 'DESeq2: Heatmap of the sample-to-sample distances'
+# xlab: True
+# reverseColors: True
diff --git a/assets/merged_replicate_deseq2_pca_header.txt b/assets/merged_replicate_deseq2_pca_header.txt
new file mode 100644
index 00000000..a92b75d9
--- /dev/null
+++ b/assets/merged_replicate_deseq2_pca_header.txt
@@ -0,0 +1,11 @@
+#id: 'mrep_deseq2_pca'
+#section_name: 'MERGED REP: DESeq2 PCA plot'
+#description: "PCA plot of the samples in the experiment.
+# These values are calculated using DESeq2
+# in the deseq2_qc.r script."
+#plot_type: 'scatter'
+#anchor: 'mrep_deseq2_pca'
+#pconfig:
+# title: 'DESeq2: Principal component plot'
+# xlab: PC1
+# ylab: PC2
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 062ec1bb..238f8b9d 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -47,16 +47,193 @@ module_order:
- "*.final.out"
- custom_content
+# Other MultiQC config stuff here
+custom_data:
+ mapping:
+ parent_id: mapping
+ parent_name: "Mapping"
+ file_format: "tsv"
+ section_name: "Mapping"
+ description: "The mapping metrics for each experiment"
+ plot_type: "bargraph"
+ dedup_reads:
+ parent_id: dedup
+ parent_name: "Deduplication"
+ file_format: "tsv"
+ section_name: "Reads"
+ description: "The number of reads before and after PCR deduplication for each experiment"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Count"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ dedup_ratio:
+ parent_id: dedup
+ parent_name: "Deduplication"
+ file_format: "tsv"
+ section_name: "Ratio"
+ description: "The PCR deduplication ratio for each experiment"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Ratio"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ dedup_mean_umis:
+ parent_id: dedup
+ parent_name: "Deduplication"
+ file_format: "tsv"
+ section_name: "Mean UMIs"
+ description: "Mean number of unique UMIs per position for each experiment"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Mean number"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ crosslinks_counts:
+ parent_id: crosslinks
+ parent_name: "Crosslinks"
+ file_format: "tsv"
+ section_name: "Counts"
+ description: "The number of crosslinks or crosslink sites for each experiment"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Count"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ crosslinks_ratio:
+ parent_id: crosslinks
+ parent_name: "Crosslinks"
+ file_format: "tsv"
+ section_name: "Ratios"
+ description: "The ratio of number of cDNA mapping to crosslink positions for each experiment"
+ #plot_type: 'bargraph'
+ pconfig:
+ ylab: "Count"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ tt_decimals: 2
+ peaks_counts:
+ parent_id: peaks
+ parent_name: "Peaks"
+ file_format: "tsv"
+ section_name: "Counts"
+ description: "The total number of peaks called by each peak caller"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Number of peaks"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ xlinks_in_peaks:
+ parent_id: peaks
+ parent_name: "Peaks"
+ file_format: "tsv"
+ section_name: "Crosslinks positions in peaks"
+ description: "The total percentage of crosslinks within peaks for each peak caller"
+ #plot_type: 'bargraph'
+ pconfig:
+ ylab: "Percentage of crosslinks"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ tt_decimals: 2
+ tt_suffix: "%"
+ xlinksites_in_peaks:
+ parent_id: peaks
+ parent_name: "Peaks"
+ file_format: "tsv"
+ section_name: "Crosslinks positions in peaks"
+ description: "The total percentage of crosslink sites within peaks for each peak caller"
+ #plot_type: 'bargraph'
+ pconfig:
+ ylab: "Percentage of crosslink sites"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ tt_decimals: 2
+ tt_suffix: "%"
+ peaks_xlinksite_coverage:
+ parent_id: peaks
+ parent_name: "Peaks"
+ file_format: "tsv"
+ section_name: "Peak-crosslink coverage"
+ description: "The total percentage of nucleotides within peaks covered by a crosslink site"
+ plot_type: "bargraph"
+ pconfig:
+ ylab: "Percentage of nucleotides within peaks"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ tt_decimals: 2
+ tt_suffix: "%"
+ summary_type:
+ parent_id: Summary
+ parent_name: "Summary"
+ file_format: "tsv"
+ section_name: "Percentage of cDNA premap"
+ description: "The total percentage of cDNA summary mapped"
+ #plot_type: 'bargraph'
+ pconfig:
+ ylab: "Type"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+ summary_subtype:
+ parent_id: Summary
+ parent_name: "Summary"
+ file_format: "tsv"
+ section_name: "Percentage of cDNA premap subtypes"
+ description: "The total percentage of cDNA subtypes mapped"
+ #plot_type: 'bargraph'
+ pconfig:
+ ylab: "Type"
+ #stacking: False
+ cpswitch: False
+ tt_percentages: False
+
+sp:
+ mapping:
+ fn: "mapping.tsv"
+ dedup_reads:
+ fn: "dedup_reads.tsv"
+ dedup_ratio:
+ fn: "dedup_ratio.tsv"
+ dedup_mean_umis:
+ fn: "dedup_mean_umis.tsv"
+ crosslinks_counts:
+ fn: "xlinks_counts.tsv"
+ crosslinks_ratio:
+ fn: "xlinks_ratio.tsv"
+ peaks:
+ fn: "total_peaks.tsv"
+ xlinks_in_peaks:
+ fn: "xlinks_in_peaks.tsv"
+ xlinksites_in_peaks:
+ fn: "xlinksites_in_peaks.tsv"
+ peaks_xlinksite_coverage:
+ fn: "peaks_xlinksite_coverage.tsv"
+ summary_type:
+ fn: "summary_type_metrics.tsv"
+ summary_subtype:
+ fn: '"summary_subtype_metrics.tsv'
+
custom_content:
order:
+ - clipqc
- software-versions-by-process
- software-versions-unique
-
# Customise the module search patterns to speed up execution time
-sp:
- samtools/stats:
- fn: "*.stats"
- samtools/flagstat:
- fn: "*.flagstat"
- samtools/idxstats:
- fn: "*.idxstats*"
+# sp:
+# samtools/stats:
+# fn: "*.stats"
+# samtools/flagstat:
+# fn: "*.flagstat"
+# samtools/idxstats:
+# fn: "*.idxstats*"
+# clipqc:
+# fn: "*.txt"
diff --git a/conf/modules.config b/conf/modules.config
index 5e0d3d5e..ee1e6f2f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -34,6 +34,7 @@ process {
========================================================================================
*/
+
if(params.run_genome_prep) {
process {
withName: '.*PREPARE_GENOME:GUNZIP_.*' {
@@ -71,7 +72,6 @@ if(params.run_genome_prep) {
path: { "${params.outdir}/00_genome" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- enabled: params.save_reference
]
}
@@ -157,6 +157,7 @@ if(params.run_genome_prep) {
}
}
+
/*
========================================================================================
PRE-PROCESSING
@@ -915,36 +916,6 @@ if(params.run_peakcalling && params.peakcaller.contains('icount')) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
- withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CONSENSUS_ICOUNTMINI_SIGXLS' {
- ext.args = { "${params.icount_sigxl_params}" }
- publishDir = [
- enabled: false
- ]
- }
-
- withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CONSENSUS_ICOUNTMINI_PEAKS' {
- ext.args = { "${params.icount_peaks_params}" }
- publishDir = [
- enabled: false
- ]
- }
-
- withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CONSENSUS_GUNZIP_ICOUNTMINI_SIGXLS' {
- publishDir = [
- path: { "${params.outdir}/05_peakcalling/icountmini" },
- mode: "${params.publish_dir_mode}",
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
- withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CONSENSUS_GUNZIP_ICOUNTMINI_PEAKS' {
- publishDir = [
- path: { "${params.outdir}/05_peakcalling/icountmini" },
- mode: "${params.publish_dir_mode}",
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
}
}
@@ -1066,13 +1037,13 @@ if(params.run_reporting) {
]
}
- // withName: 'CLIPSEQ:CLIPSEQ_CLIPQC' {
- // publishDir = [
- // path: { "${params.outdir}/06_reports/clipqc" },
- // mode: "${params.publish_dir_mode}",
- // saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- // ]
- // }
+ withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CLIPQC' {
+ publishDir = [
+ path: { "${params.outdir}/06_reports/clipqc" },
+ mode: "${params.publish_dir_mode}",
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
withName: 'NFCORE_CLIPSEQ:CLIPSEQ:MULTIQC' {
ext.args = params.multiqc_title ? "-v --title \"$params.multiqc_title\"" : '-v'
diff --git a/conf/test.config b/conf/test.config
index 2b5811f7..6a241ddf 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -54,3 +54,4 @@ params {
// Pipeline params
umitools_bc_pattern = 'NNNNNNNNN'
}
+
diff --git a/modules.json b/modules.json
index b51c411a..5e0e1025 100644
--- a/modules.json
+++ b/modules.json
@@ -70,6 +70,11 @@
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
"installed_by": ["modules"]
},
+ "deseq2/differential": {
+ "branch": "master",
+ "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
+ "installed_by": ["modules"]
+ },
"fastqc": {
"branch": "master",
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
diff --git a/modules/local/bedgraph_strand_split/main.nf b/modules/local/bedgraph_strand_split/main.nf
index 5888a035..d611d0f3 100644
--- a/modules/local/bedgraph_strand_split/main.nf
+++ b/modules/local/bedgraph_strand_split/main.nf
@@ -1,7 +1,7 @@
// modules/local/bedgraph_strand_split/main.nf
process BEDGRAPH_STRAND_SPLIT {
tag "$meta.id"
- label 'process_low'
+ label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/clipqc/main.nf b/modules/local/clipqc/main.nf
index 3c9e81e7..0fc14820 100644
--- a/modules/local/clipqc/main.nf
+++ b/modules/local/clipqc/main.nf
@@ -12,6 +12,10 @@ process CLIPQC {
path("icount/*")
path("paraclu/*")
path("clippy/*")
+ path("pureclip/*")
+ path("summary_type/*")
+ path("summary_subtype/*")
+ path("summary_gene/*")
output:
path "*.tsv" , emit: tsv
diff --git a/modules/local/clipqc/templates/clipqc.py b/modules/local/clipqc/templates/clipqc.py
index f265983f..122f714c 100644
--- a/modules/local/clipqc/templates/clipqc.py
+++ b/modules/local/clipqc/templates/clipqc.py
@@ -49,6 +49,7 @@ def main(process_name):
ncrna["ncrna_reads"].append(input_reads - output_reads)
ncrna_df = pd.DataFrame(ncrna)
+ print(ncrna_df)
# Next get STAR logs
star_logs = sorted(["mapped/" + f for f in os.listdir("mapped") if f.endswith(".Log.final.out")])
@@ -96,7 +97,7 @@ def main(process_name):
for dedup_log in dedup_logs:
with open(dedup_log, "r") as logfile:
- exp = re.sub(".log", "", os.path.basename(dedup_log))
+ exp = re.sub(".genomeUnique.dedup_UMICollapse.log", "", os.path.basename(dedup_log))
lines = logfile.readlines()
@@ -149,26 +150,26 @@ def read_bed(filename):
# First get xlink bed files
xlinks_files = sorted(["xlinks/" + f for f in os.listdir("xlinks") if f.endswith(".bed")])
- xlinks = dict((key, []) for key in ["exp", "total_xlinks", "total_xlinksites", "ratio"])
+ xlinks = dict((key, []) for key in ["exp", "number_of_cDNAs", "total_positions_of_crosslinks", "ratio"])
for xlinks_file in xlinks_files:
xlinks_df = read_bed(xlinks_file)
- exp = re.sub(".bed", "", os.path.basename(xlinks_file))
- total_xlinks = xlinks_df["score"].sum()
- total_xlinksites = xlinks_df.shape[0]
- ratio = total_xlinks / total_xlinksites
+ exp = re.sub(".genome.xl.bed", "", os.path.basename(xlinks_file))
+ number_of_cDNAs = xlinks_df["score"].sum()
+ total_positions_of_crosslinks = xlinks_df.shape[0]
+ ratio = number_of_cDNAs / total_positions_of_crosslinks
xlinks["exp"].append(exp)
- xlinks["total_xlinks"].append(total_xlinks)
- xlinks["total_xlinksites"].append(total_xlinksites)
- xlinks["ratio"].append(round(total_xlinks / total_xlinksites, 2))
+ xlinks["number_of_cDNAs"].append(number_of_cDNAs)
+ xlinks["total_positions_of_crosslinks"].append(total_positions_of_crosslinks)
+ xlinks["ratio"].append(round(number_of_cDNAs / total_positions_of_crosslinks, 2))
xlinks_metrics_df = pd.DataFrame(xlinks)
xlinks_metrics_df.to_csv("xlinks_metrics.tsv", sep="\t", index=False)
# Subset for MultiQC plots
- xlinks_metrics_df.loc[:, ["exp", "total_xlinks", "total_xlinksites"]].to_csv(
+ xlinks_metrics_df.loc[:, ["exp", "number_of_cDNAs", "total_positions_of_crosslinks"]].to_csv(
"xlinks_counts.tsv", sep="\t", index=False
)
xlinks_metrics_df.loc[:, ["exp", "ratio"]].to_csv("xlinks_ratio.tsv", sep="\t", index=False)
@@ -176,11 +177,78 @@ def read_bed(filename):
print(xlinks_metrics_df)
print("\n\n")
+ # ==========
+ # Summary cDNA
+ # ==========
+ summary_type_files = sorted(["summary_type/" + f for f in os.listdir("summary_type") if f.endswith(".tsv")])
+ # summary_data = dict((key, []) for key in ["exp", "Type", "Length", "Total_number_cDNA", "perc_number_cDNA"])
+ # summary_data = dict((key, []) for key in ["exp", "Type", "perc_number_cDNA"])
+ summary_data = []
+
+ for summary_type_file in summary_type_files:
+
+ exp = re.sub(".summary_type_premapadjusted.tsv", "", os.path.basename(summary_type_file))
+ df = pd.read_csv(summary_type_file, sep="\t")
+
+ exp_data = {"exp": exp}
+
+ # Add percentage for each RNA type
+ for _, row in df.iterrows():
+ rna_type = row["Type"]
+ exp_data[rna_type] = row["cDNA %"]
+
+ summary_data.append(exp_data)
+
+ summary_df = pd.DataFrame(summary_data)
+ desired_columns = ["exp", "CDS", "ncRNA", "intergenic", "premapped RNA"]
+ existing_cols = [col for col in desired_columns if col in summary_df.columns]
+ summary_df = summary_df[existing_cols]
+ summary_df.to_csv("summary_type_metrics.tsv", sep="\t", index=False)
+
+ # ==========
+ # Summary subtype cDNA
+ # ==========
+
+ summary_subtype_files = sorted(
+ ["summary_subtype/" + f for f in os.listdir("summary_subtype") if f.endswith(".tsv")]
+ )
+ summary_subtypedata = []
+
+ for summary_subtype_file in summary_subtype_files:
+
+ exp = re.sub(".summary_subtype_premapadjusted.tsv", "", os.path.basename(summary_subtype_file))
+ df = pd.read_csv(summary_subtype_file, sep="\t")
+
+ exp_data = {"exp": exp}
+
+ # Add percentage for each RNA type
+ for _, row in df.iterrows():
+ rna_type = row["Subtype"]
+ exp_data[rna_type] = row["cDNA %"]
+
+ summary_subtypedata.append(exp_data)
+
+ summary_subtype_df = pd.DataFrame(summary_subtypedata)
+ desired_columns = [
+ "exp",
+ "CDS mRNA",
+ "ncRNA rRNA",
+ "ncRNA snRNA",
+ "ncRNA snoRNA",
+ "ncRNA tRNA",
+ "ncRNA ncRNA",
+ "intergenic",
+ "premapped RNA",
+ ]
+ existing_cols = [col for col in desired_columns if col in summary_subtype_df.columns]
+ summary_subtype_df = summary_subtype_df[existing_cols]
+ summary_subtype_df.to_csv("summary_subtype_metrics.tsv", sep="\t", index=False)
+
# ==========
# Peaks
# ==========
- peakcallers = ["icount", "paraclu", "clippy"]
+ peakcallers = ["icount", "paraclu", "clippy", "pureclip"]
def get_peaks_metrics(peakcaller):
peak_files = sorted([peakcaller + "/" + f for f in os.listdir(peakcaller) if f.endswith(".bed")])
@@ -209,13 +277,15 @@ def get_peaks_metrics(peakcaller):
if peakcaller == "icount":
exp = re.sub(".peaks.bed", "", os.path.basename(peak_file))
elif peakcaller == "paraclu":
- exp = re.sub(".peaks.bed", "", os.path.basename(peak_file))
+ exp = re.sub(".genome.peaks.clustered.simplified.bed", "", os.path.basename(peak_file))
elif peakcaller == "clippy":
exp = re.sub(
- "_rollmean.+_stdev.+_minGeneCount.+.bed",
+ ".genome.peaks_rollmean10_minHeightAdjust1.0_minPromAdjust1.0_minGeneCount5_Peaks.bed",
"",
os.path.basename(peak_file),
)
+ elif peakcaller == "pureclip":
+ exp = re.sub("_pureclip.peaks.bed", "", os.path.basename(peak_file))
xlinks_df = read_bed(xlinks_files[0])
expanded_xlinks_df = xlinks_df.loc[xlinks_df.index.repeat(xlinks_df.score)].reset_index(drop=True)
diff --git a/modules/local/consensus_count_table/main.nf b/modules/local/consensus_count_table/main.nf
index 803f5af4..3515b2e8 100644
--- a/modules/local/consensus_count_table/main.nf
+++ b/modules/local/consensus_count_table/main.nf
@@ -1,6 +1,6 @@
process GET_CONSENSUS_COUNTS {
tag "${meta.id}"
- label "process_single"
+ label "process_high"
conda "bioconda::bioconductor-rtracklayer=1.62.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -13,7 +13,7 @@ process GET_CONSENSUS_COUNTS {
val(output_name)
output:
- path "*.tsv" , emit: tsv
+ tuple val(meta), path ("*.tsv") , emit: tsv
path "versions.yml" , emit: versions
when:
diff --git a/modules/local/deseq2_qc/environment.yml b/modules/local/deseq2_qc/environment.yml
new file mode 100644
index 00000000..40708368
--- /dev/null
+++ b/modules/local/deseq2_qc/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::bioconductor-deseq2=1.34.0
diff --git a/modules/local/deseq2_qc/main.nf b/modules/local/deseq2_qc/main.nf
new file mode 100644
index 00000000..8f52f7d2
--- /dev/null
+++ b/modules/local/deseq2_qc/main.nf
@@ -0,0 +1,66 @@
+process DESEQ2_QC {
+ tag "$meta.id"
+ label "process_single"
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' :
+ 'biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }"
+
+ input:
+ tuple val(meta), path(counts)
+ path deseq2_pca_header
+ path deseq2_clustering_header
+
+ output:
+ path "*.pdf" , optional:true, emit: pdf
+ path "*.RData" , optional:true, emit: rdata
+ path "*.rds" , optional:true, emit: rds
+ path "*pca.vals.txt" , optional:true, emit: pca_txt
+ path "*pca.vals_mqc.tsv" , optional:true, emit: pca_multiqc
+ path "*sample.dists.txt" , optional:true, emit: dists_txt
+ path "*sample.dists_mqc.tsv", optional:true, emit: dists_multiqc
+ path "*.log" , optional:true, emit: log
+ path "size_factors" , optional:true, emit: size_factors
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ //template 'deseq2_qc.R'
+ """
+ Rscript $projectDir/modules/local/deseq2_qc/templates/deseq2_qc.R \
+ -i ${counts} \
+ -c 3 \
+ -d "PeakID" \
+ -o ${prefix} \
+ --vst
+ """
+
+ stub:
+ """
+ touch ${meta.id}.pdf
+ touch ${meta.id}.RData
+ touch ${meta.id}.rds
+ touch ${meta.id}.pca.vals.txt
+ touch ${meta.id}.pca.vals_mqc.tsv
+ touch ${meta.id}.sample.dists.txt
+ touch ${meta.id}.sample.dists_mqc.tsv
+ touch ${meta.id}.log
+ path "size_factors"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))")
+ END_VERSIONS
+ """
+
+
+}
+
+
+
diff --git a/modules/local/deseq2_qc/templates/deseq2_qc.R b/modules/local/deseq2_qc/templates/deseq2_qc.R
new file mode 100755
index 00000000..8aff2570
--- /dev/null
+++ b/modules/local/deseq2_qc/templates/deseq2_qc.R
@@ -0,0 +1,288 @@
+#!/usr/bin/env Rscript
+
+################################################
+################################################
+## REQUIREMENTS ##
+################################################
+################################################
+
+## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE
+## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS.
+## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R
+
+################################################
+################################################
+## LOAD LIBRARIES ##
+################################################
+################################################
+
+suppressPackageStartupMessages({
+ library(optparse)
+ library(DESeq2)
+ library(ggplot2)
+ library(RColorBrewer)
+ library(pheatmap)
+})
+
+################################################
+################################################
+## PARSE COMMAND-LINE PARAMETERS ##
+################################################
+################################################
+
+# opt <- list(
+# output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
+# count_file = '$counts',
+# count_col = 3,
+# vst_name = 'vst',
+# vst = TRUE,
+# id_col = "PeakID")
+
+# opt_types <- lapply(opt, class)
+
+# # Apply parameter overrides
+
+# args_opt <- parse_args('$task.ext.args')
+# for ( ao in names(args_opt)){
+# if (! ao %in% names(opt)){
+# stop(paste("Invalid option:", ao))
+# }else{
+
+# # Preserve classes from defaults where possible
+# if (! is.null(opt[[ao]])){
+# args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]])
+# }
+# opt[[ao]] <- args_opt[[ao]]
+# }
+# }
+# if ( ! is.null(opt\$round_digits)){
+# opt\$round_digits <- as.numeric(opt\$round_digits)
+# }
+
+# if (is.null(opt$count_file)){
+# print_help(opt_parser)
+# stop("Please provide a counts file.", call.=FALSE)
+# }
+
+
+option_list <- list(
+ make_option(c("-i", "--count_file"), type="character", help="Count matrix"),
+ make_option(c("-c", "--count_col"), type="integer", default=3),
+ make_option(c("-d", "--id_col"), type="character", default="PeakID"),
+ make_option(c("-o", "--output_prefix"), type="character", default="deseq2_qc"),
+ make_option(c("--vst"), action="store_true", default=TRUE)
+)
+
+opt <- parse_args(OptionParser(option_list=option_list))
+
+################################################
+################################################
+## READ IN COUNTS FILE ##
+################################################
+
+count.table <- read.delim(file=opt$count_file,header=TRUE,check.names=FALSE, na.strings = c(".", "NA", ""))
+rownames(count.table) <- count.table[,opt$id_col]
+count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE]
+
+count.table[is.na(count.table)] <- 0
+
+
+################################################
+################################################
+## RUN DESEQ2 ##
+################################################
+################################################
+
+
+samples.vec <- colnames(count.table)
+name_components <- strsplit(samples.vec, "_")
+n_components <- length(name_components[[1]])
+decompose <- n_components!=1 && all(sapply(name_components, length)==n_components)
+coldata <- data.frame(samples.vec, sample=samples.vec, row.names=1)
+if (decompose) {
+ groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i)))
+ names(groupings) <- paste0("Group", 1:n_components)
+ n_distinct <- sapply(groupings, function(grp) length(unique(grp)))
+ groupings <- groupings[n_distinct!=1 & n_distinct!=length(samples.vec)]
+ if (ncol(groupings)!=0) {
+ coldata <- cbind(coldata, groupings)
+ } else {
+ decompose <- FALSE
+ }
+}
+
+DDSFile <- paste(opt$outprefix,".dds.RData",sep="")
+
+counts <- count.table[,samples.vec,drop=FALSE]
+dds <- DESeqDataSetFromMatrix(countData=round(counts), colData=coldata, design=~ 1)
+dds <- estimateSizeFactors(dds)
+if (min(dim(count.table))<=1) { # No point if only one sample, or one gene
+ save(dds,file=DDSFile)
+ saveRDS(dds, file = paste(opt$output_prefix,"rds", sep = '.'))
+ warning("Not enough samples or genes in counts file for PCA.", call.=FALSE)
+ quit(save = "no", status = 0, runLast = FALSE)
+}
+if (!opt$vst) {
+ vst_name <- "rlog"
+ rld <- rlog(dds)
+} else {
+ vst_name <- "vst"
+ rld <- varianceStabilizingTransformation(dds)
+}
+
+assay(dds, vst_name) <- assay(rld)
+save(dds,file=DDSFile)
+saveRDS(dds, file = paste(opt$output_prefix, "rds", sep = '.'))
+
+################################################
+################################################
+## PLOT QC ##
+################################################
+################################################
+
+##' PCA pre-processeor
+##'
+##' Generate all the necessary information to plot PCA from a DESeq2 object
+##' in which an assay containing a variance-stabilised matrix of counts is
+##' stored. Copied from DESeq2::plotPCA, but with additional ability to
+##' say which assay to run the PCA on.
+##'
+##' @param object The DESeq2DataSet object.
+##' @param ntop number of top genes to use for principla components, selected by highest row variance.
+##' @param assay the name or index of the assay that stores the variance-stabilised data.
+##' @return A data.frame containing the projected data alongside the grouping columns.
+##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains,
+##' and additionally how much the variation within that PC is explained by the grouping variable.
+##' @author Gavin Kelly
+plotPCA_vst <- function (object, ntop = 500, assay=length(assays(object))) {
+ rv <- rowVars(assay(object, assay))
+ select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
+ pca <- prcomp(t(assay(object, assay)[select, ]), center=TRUE, scale=FALSE)
+ percentVar <- pca$sdev^2/sum(pca$sdev^2)
+ df <- cbind( as.data.frame(colData(object)), pca$x)
+ #Order points so extreme samples are more likely to get label
+ ord <- order(abs(rank(df$PC1)-median(df$PC1)), abs(rank(df$PC2)-median(df$PC2)))
+ df <- df[ord,]
+ attr(df, "percentVar") <- data.frame(PC=seq(along=percentVar), percentVar=100*percentVar)
+ return(df)
+}
+
+PlotFile <- paste(opt$outprefix,".plots.pdf",sep="")
+
+pdf(file=PlotFile, onefile=TRUE, width=7, height=7)
+## PCA
+ntop <- c(500, Inf)
+for (n_top_var in ntop) {
+ pca.data <- plotPCA_vst(dds, assay=vst_name, ntop=n_top_var)
+ percentVar <- round(attr(pca.data, "percentVar")$percentVar)
+ plot_subtitle <- ifelse(n_top_var==Inf, "All peaks", paste("Top", n_top_var, "peaks"))
+ pl <- ggplot(pca.data, aes(PC1, PC2, label=paste0(" ", sample, " "))) +
+ geom_point() +
+ geom_text(check_overlap=TRUE, vjust=0.5, hjust="inward") +
+ xlab(paste0("PC1: ",percentVar[1],"% variance")) +
+ ylab(paste0("PC2: ",percentVar[2],"% variance")) +
+ labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) +
+ theme(legend.position="top",
+ panel.grid.major = element_blank(),
+ panel.grid.minor = element_blank(),
+ panel.background = element_blank(),
+ panel.border = element_rect(colour = "black", fill=NA, size=1))
+ print(pl)
+
+ if (decompose) {
+ pc_names <- paste0("PC", attr(pca.data, "percentVar")$PC)
+ long_pc <- reshape(pca.data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow")
+ long_pc <- subset(long_pc, component<=5)
+ long_pc_grp <- reshape(long_pc, varying=names(groupings), direction="long", sep="", timevar="grouper")
+ long_pc_grp <- subset(long_pc_grp, grouper<=5)
+ long_pc_grp$component <- paste("PC", long_pc_grp$component)
+ long_pc_grp$grouper <- paste0(long_pc_grp$grouper, c("st","nd","rd","th","th")[long_pc_grp$grouper], " prefix")
+ pl <- ggplot(long_pc_grp, aes(x=Group, y=PC)) +
+ geom_point() +
+ stat_summary(fun=mean, geom="line", aes(group = 1)) +
+ labs(x=NULL, y=NULL, subtitle = plot_subtitle, title="PCs split by sample-name prefixes") +
+ facet_grid(component~grouper, scales="free_x") +
+ scale_x_discrete(guide = guide_axis(n.dodge = 3))
+ print(pl)
+ }
+} # at end of loop, we'll be using the user-defined ntop if any, else all genes
+
+## WRITE PC1 vs PC2 VALUES TO FILE
+pca.vals <- pca.data[,c("PC1","PC2")]
+colnames(pca.vals) <- paste0(colnames(pca.vals), ": ", percentVar[1:2], '% variance')
+pca.vals <- cbind(sample = rownames(pca.vals), pca.vals)
+write.table(pca.vals, file = paste(opt$outprefix, ".pca.vals.txt", sep=""),
+ row.names = FALSE, col.names = TRUE, sep = "\t", quote = TRUE)
+
+## SAMPLE CORRELATION HEATMAP
+sampleDists <- dist(t(assay(dds, vst_name)))
+sampleDistMatrix <- as.matrix(sampleDists)
+colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
+pheatmap(
+ sampleDistMatrix,
+ clustering_distance_rows=sampleDists,
+ clustering_distance_cols=sampleDists,
+ col=colors,
+ main=paste("Euclidean distance between", vst_name, "of samples")
+)
+
+## WRITE SAMPLE DISTANCES TO FILE
+write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste(opt$outprefix, ".sample.dists.txt", sep=""),
+ row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE)
+dev.off()
+
+################################################
+################################################
+## SAVE SIZE FACTORS ##
+################################################
+################################################
+
+SizeFactorsDir <- "size_factors/"
+if (file.exists(SizeFactorsDir) == FALSE) {
+ dir.create(SizeFactorsDir, recursive=TRUE)
+}
+
+NormFactorsFile <- paste(SizeFactorsDir,opt$outprefix, ".size_factors.RData", sep="")
+
+normFactors <- sizeFactors(dds)
+save(normFactors, file=NormFactorsFile)
+
+for (name in names(sizeFactors(dds))) {
+ sizeFactorFile <- paste(SizeFactorsDir,name, ".size_factors.txt", sep="")
+ write(as.numeric(sizeFactors(dds)[name]), file=sizeFactorFile)
+}
+
+################################################
+################################################
+## R SESSION INFO ##
+################################################
+################################################
+
+RLogFile <- "R_sessionInfo.log"
+
+sink(RLogFile)
+a <- sessionInfo()
+print(a)
+sink()
+
+################################################
+################################################
+## VERSIONS FILE ##
+################################################
+################################################
+
+r.version <- paste(R.version[['major']],R.version[['minor']], sep = ".")
+deseq2.version <- as.character(packageVersion('DESeq2'))
+
+writeLines(
+ c(
+ '"${task.process}":',
+ paste(' r-base:', r.version),
+ paste(' bioconductor-deseq2:', deseq2.version)
+ ),
+'versions.yml')
+
+################################################
+################################################
+################################################
+################################################
\ No newline at end of file
diff --git a/modules/nf-core/peka/main.nf b/modules/nf-core/peka/main.nf
index 416a44a8..84256428 100644
--- a/modules/nf-core/peka/main.nf
+++ b/modules/nf-core/peka/main.nf
@@ -1,6 +1,6 @@
process PEKA {
tag "$meta.id"
- label 'process_low'
+ label 'process_high'
conda "bioconda::peka=1.0.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf
index 81cdee95..37a201f5 100644
--- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf
+++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf
@@ -1,6 +1,6 @@
process UCSC_BEDGRAPHTOBIGWIG {
tag "$meta.id"
- label 'process_single'
+ label 'process_medium'
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
diff --git a/nextflow.config b/nextflow.config
index 83861b5f..71319c7e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,6 +54,7 @@ params {
skip_umi_extract = true
skip_trimming = false
skip_transcriptome = true
+ skip_deseq2_qc = false
// Output params
save_reference = false
diff --git a/subworkflows/local/consensus_peak_table.nf b/subworkflows/local/consensus_peak_table.nf
index aec66902..e0a747ff 100644
--- a/subworkflows/local/consensus_peak_table.nf
+++ b/subworkflows/local/consensus_peak_table.nf
@@ -5,6 +5,7 @@
//
// MODULES
//
+include { DESEQ2_QC } from '../../modules/local/deseq2_qc'
include { BEDTOOLS_SORT as CONSENSUS_PEAKS_SORT } from '../../modules/nf-core/bedtools/sort/main'
include { BEDTOOLS_SORT as CROSSLINKS_SORT } from '../../modules/nf-core/bedtools/sort/main'
include { BEDTOOLS_MAP as CONSENSUS_MAP } from '../../modules/nf-core/bedtools/map/main'
@@ -18,9 +19,13 @@ workflow CONSENSUS_PEAK_TABLE {
take:
all_crosslinks // channel: [ val(meta), [ xl.bed ] ]
consensus_peaks // channel: [ val(meta), [ peaks.bed ] ]
+ genome_fasta
genome_fai // channel: [ val(meta), [ index ] ]
gtf // channel: [ val(meta), [ gtf ] ]
table_name // val "Clippy_Consensus_Counts.tsv" must end in tsv
+ ch_deseq2_pca_header_multiqc
+ ch_deseq2_clustering_header_multiqc
+ skip_deseq2_qc
main:
ch_versions = Channel.empty()
@@ -45,7 +50,7 @@ workflow CONSENSUS_PEAK_TABLE {
CONSENSUS_MAP (
ch_consensus_map,
- genome_fai
+ genome_fai,
)
CONSENSUS_MAP.out.mapped
@@ -59,7 +64,48 @@ workflow CONSENSUS_PEAK_TABLE {
table_name
)
+ //
+ // Generate QC plots with DESeq2
+ //
+ ch_deseq2_qc_pdf = Channel.empty()
+ ch_deseq2_qc_rdata = Channel.empty()
+ ch_deseq2_qc_rds = Channel.empty()
+ ch_deseq2_qc_pca_txt = Channel.empty()
+ ch_deseq2_qc_pca_multiqc = Channel.empty()
+ ch_deseq2_qc_dists_txt = Channel.empty()
+ ch_deseq2_qc_dists_multiqc = Channel.empty()
+ ch_deseq2_qc_log = Channel.empty()
+ ch_deseq2_qc_size_factors = Channel.empty()
+ if (!skip_deseq2_qc) {
+ DESEQ2_QC (
+ GET_CONSENSUS_COUNTS.out.tsv,
+ ch_deseq2_pca_header_multiqc,
+ ch_deseq2_clustering_header_multiqc
+ )
+ ch_deseq2_qc_pdf = DESEQ2_QC.out.pdf
+ ch_deseq2_qc_rdata = DESEQ2_QC.out.rdata
+ ch_deseq2_qc_rds = DESEQ2_QC.out.rds
+ ch_deseq2_qc_pca_txt = DESEQ2_QC.out.pca_txt
+ ch_deseq2_qc_pca_multiqc = DESEQ2_QC.out.pca_multiqc
+ ch_deseq2_qc_dists_txt = DESEQ2_QC.out.dists_txt
+ ch_deseq2_qc_dists_multiqc = DESEQ2_QC.out.dists_multiqc
+ ch_deseq2_qc_log = DESEQ2_QC.out.log
+ ch_deseq2_qc_size_factors = DESEQ2_QC.out.size_factors
+ ch_versions = ch_versions.mix(DESEQ2_QC.out.versions)
+ }
+
emit:
- mapped_table = GET_CONSENSUS_COUNTS.out.tsv // channel: [ val(meta), [ tsv ] ]
+ mapped_table = GET_CONSENSUS_COUNTS.out.tsv // channel: [ val(meta), [ tsv ] ]
+
+
+ deseq2_qc_pdf = ch_deseq2_qc_pdf // channel: [ pdf ]
+ deseq2_qc_rdata = ch_deseq2_qc_rdata // channel: [ rdata ]
+ deseq2_qc_rds = ch_deseq2_qc_rds // channel: [ rds ]
+ deseq2_qc_pca_txt = ch_deseq2_qc_pca_txt // channel: [ txt ]
+ deseq2_qc_pca_multiqc = ch_deseq2_qc_pca_multiqc // channel: [ txt ]
+ deseq2_qc_dists_txt = ch_deseq2_qc_dists_txt // channel: [ txt ]
+ deseq2_qc_dists_multiqc = ch_deseq2_qc_dists_multiqc // channel: [ txt ]
+ deseq2_qc_log = ch_deseq2_qc_log // channel: [ txt ]
+ deseq2_qc_size_factors = ch_deseq2_qc_size_factors // channel: [ txt ]
versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index ae509397..606e5f0c 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -117,7 +117,7 @@ workflow PREPARE_GENOME {
}
}
else {
- ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta ).index
+ ch_bt_index = BOWTIE_BUILD ( ch_ncrna_fasta).index
ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions)
}
diff --git a/subworkflows/local/rna_align.nf b/subworkflows/local/rna_align.nf
index a6e00c3b..d5ea9dd4 100644
--- a/subworkflows/local/rna_align.nf
+++ b/subworkflows/local/rna_align.nf
@@ -229,4 +229,4 @@ workflow RNA_ALIGN {
transcript_multi_bai = ch_transcript_multi_bai // channel: [ val(meta), [ bai ] ]
versions = ch_versions // channel: [ versions.yml ]
-}
+}
\ No newline at end of file
diff --git a/workflows/clipseq.nf b/workflows/clipseq.nf
index a3078633..8e0a17e2 100644
--- a/workflows/clipseq.nf
+++ b/workflows/clipseq.nf
@@ -56,6 +56,10 @@ if ((caller_list + callers).unique().size() != caller_list.size()) {
ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)
ch_dummy_file2 = file("$projectDir/assets/dummy_file2.txt", checkIfExists: true)
+// deseq2 header files:
+ch_multiqc_merged_replicate_deseq2_pca_header = file("$projectDir/assets/merged_replicate_deseq2_pca_header.txt", checkIfExists: true)
+ch_multiqc_merged_replicate_deseq2_clustering_header = file("$projectDir/assets/merged_replicate_deseq2_clustering_header.txt", checkIfExists: true)
+
// // Check if an AWS iGenome has been provided to use the appropriate version of STAR
// def is_aws_igenome = false
// if (params.fasta && params.gtf) {
@@ -133,10 +137,6 @@ include { ICOUNTMINI_PEAKS } from "../m
include { GUNZIP as GUNZIP_ICOUNTMINI_SIGXLS } from "../modules/nf-core/gunzip/main"
include { GUNZIP as GUNZIP_ICOUNTMINI_PEAKS } from "../modules/nf-core/gunzip/main"
-include { ICOUNTMINI_SIGXLS as CONSENSUS_ICOUNTMINI_SIGXLS } from "../modules/nf-core/icountmini/sigxls/main"
-include { ICOUNTMINI_PEAKS as CONSENSUS_ICOUNTMINI_PEAKS } from "../modules/nf-core/icountmini/peaks/main"
-include { GUNZIP as CONSENSUS_GUNZIP_ICOUNTMINI_SIGXLS } from "../modules/nf-core/gunzip/main"
-include { GUNZIP as CONSENSUS_GUNZIP_ICOUNTMINI_PEAKS } from "../modules/nf-core/gunzip/main"
include { PARACLU as PARACLU_GENOME } from "../modules/nf-core/paraclu/main"
include { PARACLU as PARACLU_GENOME_CONSENSUS } from "../modules/nf-core/paraclu/main"
@@ -251,6 +251,7 @@ workflow CLIPSEQ {
ch_ncrna_genome_index = PREPARE_GENOME.out.ncrna_index.collect()
}
+
//
// SUBWORKFLOW: Read in samplesheet, validate, stage input files and merge replicates
//
@@ -267,7 +268,7 @@ workflow CLIPSEQ {
// ch_fastq | view
//
- // SUBWORKFLOW: Extract UMI, trim and run b4 and after fastqc
+ // SUBWORKFLOW: Extract UMI, trim and run before and after fastqc
//
if(params.source == "fastq" & params.run_preprocessing) {
FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
@@ -366,7 +367,7 @@ workflow CLIPSEQ {
ch_versions = ch_versions.mix(GENOME_UNIQUE_DEDUP.out.versions)
ch_genome_unique_dedupe_bam = GENOME_UNIQUE_DEDUP.out.bam
ch_genome_unique_dedupe_bai = GENOME_UNIQUE_DEDUP.out.bai
- //ch_umi_log = GENOME_UNIQUE_DEDUP.out.umi_log
+ ch_umi_log = GENOME_UNIQUE_DEDUP.out.umi_log
GENOME_MULTI_DEDUP (
ch_genome_multi_bam_bai
@@ -440,6 +441,10 @@ workflow CLIPSEQ {
ch_merged_summaries
)
ch_versions = ch_versions.mix(MERGE_SUMMARY.out.versions)
+ ch_merged_summary_type = MERGE_SUMMARY.out.summary_type_adjusted
+ ch_merged_summary_subtype = MERGE_SUMMARY.out.summary_subtype_adjusted
+ ch_merged_summary_gene = MERGE_SUMMARY.out.summary_gene_adjusted
+
ICOUNTMINI_METAGENE (
ch_genome_crosslink_group_resolved_bed,
@@ -492,6 +497,15 @@ workflow CLIPSEQ {
ch_icountmini_sigxls = Channel.empty()
ch_paraclu_genome_peaks = Channel.empty()
+ //
+ // PEKA CHANNELS
+ //
+ ch_peka_icountmini_peaks = Channel.empty()
+ ch_peka_clippy_peaks = Channel.empty()
+ ch_peka_paraclu_peaks = Channel.empty()
+ ch_peka_pureclip_peaks = Channel.empty()
+
+
if(params.run_peakcalling) {
if('clippy' in callers) {
@@ -514,9 +528,13 @@ workflow CLIPSEQ {
CLIPPY_CONSENSUS_PEAK_TABLE (
ch_all_crosslinks,
CLIPPY_GENOME_CONSENSUS.out.peaks,
+ ch_fasta,
ch_fasta_fai,
ch_regions_used,
- "Clippy_Consensus_AllCounts.tsv"
+ "Clippy_Consensus_AllCounts.tsv",
+ ch_multiqc_merged_replicate_deseq2_pca_header,
+ ch_multiqc_merged_replicate_deseq2_clustering_header,
+ params.skip_deseq2_qc
)
ch_versions = ch_versions.mix(CLIPPY_CONSENSUS_PEAK_TABLE.out.versions)
}
@@ -573,37 +591,6 @@ workflow CLIPSEQ {
ch_versions = ch_versions.mix(GUNZIP_ICOUNTMINI_PEAKS.out.versions)
ch_icountmini_peaks = GUNZIP_ICOUNTMINI_PEAKS.out.gunzip
- if(params.consensus_peak){
- CONSENSUS_ICOUNTMINI_SIGXLS (
- ch_consensus_crosslinks_final_bed,
- ch_seg_gtf.collect{ it[1]}
- )
- // CHANNEL: Create combined channel of input crosslinks and sigxls
- ch_consensus_peaks_input = ch_consensus_crosslinks_final_bed
- .map{ [ it[0].id, it[0], it[1] ] }
- .join( CONSENSUS_ICOUNTMINI_SIGXLS.out.sigxls.map{ [ it[0].id, it[0], it[1] ] } )
- .map { [ it[1], it[2], it[4] ] }
- //EXAMPLE CHANNEL STRUCT: [ [id:test], BED(crosslinks), BED(sigxls) ]
- CONSENSUS_ICOUNTMINI_PEAKS (
- ch_consensus_peaks_input
- )
- ch_consensus_peaks = CONSENSUS_ICOUNTMINI_PEAKS.out.peaks
- CONSENSUS_GUNZIP_ICOUNTMINI_SIGXLS (
- CONSENSUS_ICOUNTMINI_SIGXLS.out.sigxls
- )
- CONSENSUS_GUNZIP_ICOUNTMINI_PEAKS (
- CONSENSUS_ICOUNTMINI_PEAKS.out.peaks
- )
- ICOUNT_CONSENSUS_PEAK_TABLE (
- ch_all_crosslinks,
- CONSENSUS_GUNZIP_ICOUNTMINI_PEAKS.out.gunzip,
- ch_fasta_fai,
- ch_regions_used,
- "iCount-Mini_Consensus_AllCounts.tsv"
- )
- ch_versions = ch_versions.mix(ICOUNT_CONSENSUS_PEAK_TABLE.out.versions)
- }
-
if(params.run_peka) {
PEKA_ICOUNT (
ch_icountmini_peaks,
@@ -638,9 +625,13 @@ workflow CLIPSEQ {
PARACLU_CONSENSUS_PEAK_TABLE (
ch_all_crosslinks,
PARACLU_GENOME_CONSENSUS.out.bed,
+ ch_fasta,
ch_fasta_fai,
ch_regions_used,
- "Paraclu_Consensus_AllCounts.tsv"
+ "Paraclu_Consensus_AllCounts.tsv",
+ ch_multiqc_merged_replicate_deseq2_pca_header,
+ ch_multiqc_merged_replicate_deseq2_clustering_header,
+ params.skip_deseq2_qc
)
ch_versions = ch_versions.mix(PARACLU_CONSENSUS_PEAK_TABLE.out.versions)
}
@@ -755,26 +746,34 @@ workflow CLIPSEQ {
}
if(params.run_reporting) {
- //
+
// MODULE: Collect software versions
- //
- DUMP_SOFTWARE_VERSIONS (
- ch_versions.unique().collectFile()
- )
-
- //
- // MODULE: Run clipqc
- //
- // CLIPSEQ_CLIPQC (
- // ch_bt_log.map{ it[1] },
- // ch_star_log.map{ it[1] },
- // ch_umi_log.map{ it[1] },
- // ch_genome_crosslink_bed.map{ it[1] },
- // ICOUNT_ANALYSE.out.bed_peaks.map{ it[1] },
- // PARACLU_ANALYSE_GENOME.out.peaks.map{ it[1] },
- // CLIPPY_GENOME.out.peaks.map{ it[1] }
+
+ // DUMP_SOFTWARE_VERSIONS (
+ // ch_versions.unique().collectFile()
// )
+
+ // MODULE: Run clipqc
+
+ CLIPQC (
+ ch_ncrna_log.collect{ it[1] },
+ ch_genome_log.collect{ it[1] },
+ ch_umi_log.collect{ it[1] },
+ ch_genome_crosslink_group_resolved_bed.collect{ it[1] },
+ ch_icountmini_peaks.collect{ it[1] },
+ ch_paraclu_genome_peaks.collect {it[1]},
+ ch_clippy_genome_peaks.collect {it[1]},
+ ch_pureclip_genome_peaks.collect {it[1] },
+ ch_merged_summary_type.collect{it[1]},
+ ch_merged_summary_subtype.collect{it[1]},
+ ch_merged_summary_gene.collect{it[1]},
+
+ //ICOUNT_ANALYSE.out.bed_peaks.map{ it[1] },
+ //PARACLU_ANALYSE_GENOME.out.peaks.map{ it[1] },
+ //CLIPPY_GENOME.out.peaks.map{ it[1] }
+ )
+
//
// MODULE: Run multiqc
//
@@ -787,14 +786,17 @@ workflow CLIPSEQ {
ch_multiqc_files = Channel.empty()
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(DUMP_SOFTWARE_VERSIONS.out.mqc_yml.collect())
- ch_multiqc_files = ch_multiqc_files.mix(DUMP_SOFTWARE_VERSIONS.out.mqc_unique_yml.collect())
-
+ //ch_multiqc_files = ch_multiqc_files.mix(DUMP_SOFTWARE_VERSIONS.out.mqc_yml.collect())
+ //ch_multiqc_files = ch_multiqc_files.mix(DUMP_SOFTWARE_VERSIONS.out.mqc_unique_yml.collect())
+
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(ch_ncrna_log.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(ch_genome_log.collect{it[1]}.ifEmpty([]))
+ ch_multiqc_files = ch_multiqc_files.mix(CLIPQC.out.tsv.collect().ifEmpty([]))
+
+ ch_tmp = ch_ncrna_log.collect{it[1]}
MULTIQC (
ch_multiqc_files.collect(),