goodwright · hanzhong-bai · Oct 7, 2024 · Oct 15, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.nf-test.log b/.nf-test.log
@@ -0,0 +1,72 @@
+Oct-10 10:26:30.050 [main] INFO  com.askimed.nf.test.App - nf-test 0.9.0
+Oct-10 10:26:30.116 [main] INFO  com.askimed.nf.test.App - Arguments: [test, /nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq/tests/subworkflows/local/prep_whole_read/main.nf.test, --profile, singularity]
+Oct-10 10:26:31.962 [main] INFO  com.askimed.nf.test.App - Nextflow Version: 24.04.4
+Oct-10 10:26:31.965 [main] INFO  com.askimed.nf.test.commands.RunTestsCommand - Load config from file /nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq/nf-test.config...
+Oct-10 10:26:37.832 [main] INFO  com.askimed.nf.test.lang.dependencies.DependencyResolver - Loaded 81 files from directory /nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq in 5.544 sec
+Oct-10 10:26:37.836 [main] INFO  com.askimed.nf.test.lang.dependencies.DependencyResolver - Found 1 tests.
+Oct-10 10:26:37.837 [main] DEBUG com.askimed.nf.test.lang.dependencies.DependencyResolver - Found tests: [/nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq/tests/subworkflows/local/prep_whole_read/main.nf.test]
+Oct-10 10:26:37.837 [main] INFO  com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files.
+Oct-10 10:26:37.977 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Started test plan
+Oct-10 10:26:37.977 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Running testsuite 'Test Workflow PREP_WHOLE_READ' from file '/nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq/tests/subworkflows/local/prep_whole_read/main.nf.test'.
+Oct-10 10:26:37.978 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Run test '9de90743: genome whole read analysis'. type: com.askimed.nf.test.lang.workflow.WorkflowTest
+Oct-10 11:12:39.741 [main] DEBUG com.askimed.nf.test.lang.extensions.SnapshotFile - Load snapshots from file '/nemo/lab/ulej/home/users/baih/PM24162/goodwright_clipseq/clipseq/tests/subworkflows/local/prep_whole_read/main.nf.test.snap'
+Oct-10 11:12:40.908 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Test '9de90743: genome whole read analysis' finished. status: FAILED
+java.lang.OutOfMemoryError: Required array size too large
+	at java.base/java.nio.file.Files.readAllBytes(Files.java:3294)
+	at com.askimed.nf.test.util.FileUtil.getMd5(FileUtil.java:114)
+	at com.askimed.nf.test.lang.extensions.PathExtension.getMd5(PathExtension.java:21)
+	at com.askimed.nf.test.lang.extensions.util.PathConverter.serializeFile(PathConverter.java:57)
+	at com.askimed.nf.test.lang.extensions.util.PathConverter.convert(PathConverter.java:42)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:185)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:164)
+	at groovy.json.DefaultJsonGenerator.writeIterator(DefaultJsonGenerator.java:396)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:202)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:164)
+	at groovy.json.DefaultJsonGenerator.writeIterator(DefaultJsonGenerator.java:396)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:202)
+	at groovy.json.DefaultJsonGenerator.writeMapEntry(DefaultJsonGenerator.java:381)
+	at groovy.json.DefaultJsonGenerator.writeMap(DefaultJsonGenerator.java:369)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:200)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:164)
+	at groovy.json.DefaultJsonGenerator.writeIterator(DefaultJsonGenerator.java:396)
+	at groovy.json.DefaultJsonGenerator.writeArray(DefaultJsonGenerator.java:279)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:221)
+	at groovy.json.DefaultJsonGenerator.writeObject(DefaultJsonGenerator.java:164)
+	at groovy.json.DefaultJsonGenerator.toJson(DefaultJsonGenerator.java:98)
+	at com.askimed.nf.test.lang.extensions.SnapshotFileItem.toString(SnapshotFileItem.java:80)
+	at com.askimed.nf.test.lang.extensions.SnapshotFileItem.equals(SnapshotFileItem.java:64)
+	at com.askimed.nf.test.lang.extensions.Snapshot.match(Snapshot.java:57)
+	at com.askimed.nf.test.lang.extensions.Snapshot.match(Snapshot.java:27)
+	at com.askimed.nf.test.lang.extensions.Snapshot$match.call(Unknown Source)
+	at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCall(CallSiteArray.java:47)
+	at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:125)
+	at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:130)
+	at main_nf$_run_closure1$_closure2$_closure4.doCall(main.nf.test:38)
+	at main_nf$_run_closure1$_closure2$_closure4.doCall(main.nf.test)
+	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
+	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
+	at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107)
+	at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323)
+	at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:274)
+	at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1030)
+	at groovy.lang.Closure.call(Closure.java:427)
+	at groovy.lang.Closure.call(Closure.java:406)
+	at com.askimed.nf.test.lang.TestCode.execute(TestCode.java:16)
+	at com.askimed.nf.test.lang.workflow.WorkflowTest.execute(WorkflowTest.java:178)
+	at com.askimed.nf.test.core.TestExecutionEngine.execute(TestExecutionEngine.java:165)
+	at com.askimed.nf.test.commands.RunTestsCommand.execute(RunTestsCommand.java:299)
+	at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:43)
+	at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:18)
+	at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
+	at picocli.CommandLine.access$1300(CommandLine.java:145)
+	at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
+	at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
+	at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
+	at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
+	at picocli.CommandLine.execute(CommandLine.java:2078)
+	at com.askimed.nf.test.App.run(App.java:39)
+	at com.askimed.nf.test.App.main(App.java:46)
+Oct-10 11:12:40.979 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Testsuite 'Test Workflow PREP_WHOLE_READ' finished. snapshot file: true, skipped tests: false, failed tests: true
+Oct-10 11:12:40.979 [main] INFO  com.askimed.nf.test.core.TestExecutionEngine - Executed 1 tests. 1 tests failed. Done!
diff --git a/Documentation.md b/Documentation.md
@@ -16,6 +16,9 @@ For a detailed review of the considerations behind each analysis step you can co
 
 This pipeline requires demultiplexed fastq sample input and an associated metadata spreadsheet. We recommend using [our demultiplex pipeline](https://github.com/goodwright/flow-nf/tree/master/subworkflows/goodwright/clip_demultiplex) to produce these individual samples fastq.
 
+[ARTR-seq Add-On]
+'pairwise_samplesheet': taking a comma-separated pairwise samplesheet for macs3 pairwise peak calling. The group and replicates must match that in the samplesheet, and duplicates of the IP sample are not allowed (however, the same control sample can be used multiple times). 
+
 ## Curated Outputs
 
 These are what we would consider to be the most commonly used outputs of the pipeline, so on the Flow platform we present these as "curated ouputs". All of the outputs are listed in the "Pipeline in Detail" section below.
@@ -35,6 +38,9 @@ K-mer enrichment motif analysis performed by PEKA. PDF file with graphs showing
 **Quality control report**
 MultiQC report of important quality control metrics. Produced by MULTIQC process.
 
+[ARTR-seq Add-On]
+**MACS3 Peak Calling**
+Peaks generated by MACS3.
 ## Commonly used parameters
 
 **Moving UMI from fastq reads to read header**
@@ -62,6 +68,13 @@ When you are working with data you're already familiar with you might have speci
 - Minimum length of reads kept by Trim Galore! after trimming, `trim_length`, eg. `10`
 - Bowtie parameters for pre-mapping, `bowtie_params`, eg. `"-v 2 -m 100 --norc --best --strata"`
 - Paraclu minimum cut off value, `paraclu_min_value`, eg. `30`
+[ARTR-seq Add-On]
+- MACS3 genome-specific size, `macs_gsize`, eg. `hs` for human
+- MACS3 peak calling parameters, `macs3_params`, eg. `--keep-dup all --nomodel --extsize 30 --bdg`
+
+**ARTR-seq Switch**
+
+The boolean parameter `whole_read_analysis` controls the ARTR-seq-specific pipeline running when set to true. It disables the crosslink-site processing of clip-seq and instead switches to whole-read processing and mapping.
 
 ## Pipeline in Detail
 
@@ -147,6 +160,7 @@ This detailed description will present each <u>subworkflow</u>/_module_ run in t
 15. <u>CLIPSEQ_CLIPQC</u>
 16. _MULTIQC_
 
+
 ## Common Issues
 
 **Analysis of publicly available data**

diff --git a/README.md b/README.md
@@ -17,6 +17,14 @@ profiles `test` and the container engine you wish to use eg. `docker`. For examp
 nextflow run main.nf -profile test,docker
 ```
 
+[ARTR-seq Test on NEMO, run as sbatch or on interactive mode]
+```bash
+ml Nextflow/23.04.4
+ml Singularity/3.11.3
+export NXF_SINGULARITY_CACHEDIR=/camp/lab/ulej/home/shared/singularity
+nextflow run main.nf -profile crick,test_artr_seq
+```
+
 Full dataset testing of 9 iCLIP samples can also be run using profile `test_full`.
 A test can also be run that skips all preparing of annotations/indexes using profile `test_no_prep_genome`.
 
@@ -30,6 +38,12 @@ If you require all reference files (eg. genomic indexes, filtered and segmented
 | ------- | --------- | ---------------------------------------------------------------------- | ------- |
 | TDP43_1 | 1         | s3://nf-core-awsmegatests/clipseq/input_data/fastq/ERR1530360.fastq.gz |         |
 
+- `pairwise_samplesheet` : csv file containing 4 columns: group,replicate,control_group,control_replicate. group is the sample name, replicate is currently unused by the pipeline so filling with '1' is acceptable, control_group is the control sample name, with control_replicate referring to which replicate to choose for MACS3 pairwise peak calling.
+
+| group   | replicate | control_group | control_replicate |
+| ------- | --------- | ------------- | ----------------- |
+| TDP43_1 | 1         | input         | 1                 |
+
 - `fasta` : genome fasta file .eg './tests/data/genome/homosapien-hg37-chr21.fa.gz'
 - `smrna_fasta` : fasta file to be mapped to before the genome file, typically containing rRNA and tRNA sequences .eg'./tests/data/genome/homosapiens_smallRNA.fa.gz'
 - `gtf` : annotation file for the genome fasta .eg'./tests/data/genome/gencode.v35.chr21.gtf.gz'

diff --git a/conf/crick.config b/conf/crick.config
@@ -0,0 +1,26 @@
+//Profile config names for nf-core/configs
+
+params {
+    config_profile_description = 'The Francis Crick Institute NEMO HPC cluster profile provided by nf-core/configs.'
+    config_profile_contact     = 'Chris Cheshire (@chris-cheshire)'
+    config_profile_url         = 'https://www.crick.ac.uk/research/platforms-and-facilities/scientific-computing/technologies'
+}
+
+singularity {
+    enabled    = true
+    autoMounts = true
+    runOptions = '--bind /nemo --bind /flask'
+    libraryDir = '/flask/apps/containers/all-singularity-images'
+}
+
+process {
+    executor = 'slurm'
+}
+
+params {
+    max_memory    = 2.TB
+    max_cpus      = 256
+    max_time      = '168.h'
+
+    igenomes_base = '/flask/reference/Genomics/aws-igenomes'
+}
diff --git a/conf/logic.config b/conf/logic.config
@@ -17,12 +17,24 @@ params {
     run_calc_crosslinks     = true
     run_peak_calling        = true
     run_reporting           = true
+    // Additional logic
+    run_pairwise_input_check  = false
+    run_whole_read_analysis       = false
 }
 
 // Set other logic
 if(params.move_umi_to_header) { params.run_move_umi_to_header = true }
 if(params.skip_umi_dedupe)    { params.run_umi_dedup = false         }
 
+// Set whole read analysis logic
+if(params.whole_read_analysis) {
+    params.run_calc_crosslinks       = false
+    params.run_pairwise_input_check  = true
+    params.run_whole_read_analysis   = true
+    params.run_peak_calling          = false
+    params.run_reporting             = false
+}
+
 // Set only logic
 if(params.only_input) {
     params.run_genome_prep        = false