From 20361d08d4cbe62f65185b799ffe174fcb9dd9bd Mon Sep 17 00:00:00 2001 From: Metin Balaban Date: Sun, 26 May 2024 17:27:18 -0500 Subject: [PATCH 1/4] bwa-mem2 support --- snakemake/Snakefile | 11 ++- snakemake/config.yaml | 5 +- .../shared/alignment_paired_end.Snakefile | 95 ++++++++++++------- .../shared/alignment_single_end.Snakefile | 86 +++++++++++------ snakemake/shared/prepare_pop_genome.Snakefile | 66 ++++++++----- .../shared/prepare_standard_genome.Snakefile | 68 ++++++++----- 6 files changed, 217 insertions(+), 114 deletions(-) diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 7371db5..0df00a3 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -35,6 +35,9 @@ CHR_PREFIX = config['CHR_PREFIX'] LENGTH_MAP = config['LENGTH_MAP'] CHROM_MAP = config['CHROM_MAP'] +ALIGNER = config['ALIGNER'] +assert ALIGNER in ['bowtie2', 'bwa-mem2'] + FAMILY = config['FAMILY'] SPOP = config['SPOP'] BCFTOOLS = config['BCFTOOLS'] @@ -47,8 +50,12 @@ THREADS = config['THREADS'] RAND_SEED = config['RAND_SEED'] '''''' -# Bowtie 2 index extensions -IDX_ITEMS = ['1', '2', '3', '4', 'rev.1', 'rev.2'] +if ALIGNER == 'bowtie2': + # Bowtie 2 index extensions + IDX_ITEMS = ['1', '2', '3', '4', 'rev.1', 'rev.2'] +else: + # bwa-mem2 index extensions + IDX_ITEMS = ['amb', 'ann', 'bwt.2bit.64', '0123', 'pac'] # Prefixes and directory paths for major-allele reference contruction and indexing PREFIX_MAJOR_F = os.path.join(DIR, 'major/{CHROM}_filtered_major') diff --git a/snakemake/config.yaml b/snakemake/config.yaml index 43b2646..5b8301a 100644 --- a/snakemake/config.yaml +++ b/snakemake/config.yaml @@ -26,8 +26,11 @@ SORT_SAM : False # Reference genome; usually a standard GRC genome GENOME : '../resources/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna' +# alignment method; either 'bowtie2' or 'bwa-mem2' +ALIGNER : 'bowtie2' + # Chromosomes included -CHROM : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y'] +CHROM : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X'] # Prefix of a chromosome. Usually set to 'chr' (GRCh38) or '' (hg19) CHR_PREFIX : 'chr' diff --git a/snakemake/shared/alignment_paired_end.Snakefile b/snakemake/shared/alignment_paired_end.Snakefile index 9e56aea..4d51f83 100644 --- a/snakemake/shared/alignment_paired_end.Snakefile +++ b/snakemake/shared/alignment_paired_end.Snakefile @@ -1,19 +1,35 @@ ''' This snakefile includes rules that perform paired-end alignment.''' ''' Perform first-pass alignment.''' -rule align_to_major: - input: - reads1 = READS1, - reads2 = READS2, - idx = expand( - os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), - idx = IDX_ITEMS) - params: - index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') - output: - sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') - threads: THREADS - shell: - 'bowtie2 --threads {THREADS} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam}' +if ALIGNER == 'bowtie2': + rule align_to_major: + input: + reads1 = READS1, + reads2 = READS2, + idx = expand( + os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), + idx = IDX_ITEMS) + params: + index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') + output: + sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') + threads: THREADS + shell: + 'bowtie2 --threads {THREADS} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam}' +else: # 'bwa-mem2' + rule align_to_major: + input: + reads1 = READS1, + reads2 = READS2, + idx = expand( + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'), + idx = IDX_ITEMS) + params: + reference = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa') + output: + sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') + threads: THREADS + shell: + 'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} {input.reads2} | samtools view -h -F 2048 -o {output.sam} -' ''' Split first-pass alignment into high-quality and low-quality files. ''' rule refflow_split_aln_by_mapq: @@ -41,25 +57,38 @@ rule refflow_split_aln_by_mapq: -t {ALN_MAPQ_THRSD} --paired-end --split-strategy {params.split_strategy}' ''' Align low-quality reads using population genomes.''' -rule refflow_align_secondpass_paired_end: - input: - reads1 = os.path.join(DIR_FIRST_PASS, - EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), - reads2 = os.path.join(DIR_FIRST_PASS, - EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'), - idx1 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'), - idx2 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'), - idx3 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'), - idx4 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'), - idx5 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'), - idx6 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2') - params: - index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) - output: - sam = PREFIX_SECOND_PASS + '.sam' - threads: THREADS - shell: - 'bowtie2 --reorder --threads {threads} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam};' +if ALIGNER == 'bowtie2': + rule refflow_align_secondpass_paired_end: + input: + [os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind) + for ind in IDX_ITEMS], + reads1 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), + reads2 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'), + params: + index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) + output: + sam = PREFIX_SECOND_PASS + '.sam' + threads: THREADS + shell: + 'bowtie2 --reorder --threads {threads} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam};' +else: # bwa-mem2 + rule refflow_align_secondpass_paired_end: + input: + [os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind) + for ind in IDX_ITEMS], + reads1 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), + reads2 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'), + params: + reference = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa') + output: + sam = PREFIX_SECOND_PASS + '.sam' + threads: THREADS + shell: + 'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} {input.reads2} | samtools view -h -F 2048 | samtools sort -n -o {output.sam}' rule refflow_merge_secondpass: input: diff --git a/snakemake/shared/alignment_single_end.Snakefile b/snakemake/shared/alignment_single_end.Snakefile index c1ef051..934474b 100644 --- a/snakemake/shared/alignment_single_end.Snakefile +++ b/snakemake/shared/alignment_single_end.Snakefile @@ -1,18 +1,33 @@ ''' This snakefile includes rules that perform single-end alignment.''' ''' Perform first-pass alignment.''' -rule align_to_major: - input: - reads1 = READS1, - idx = expand( - os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), - idx = IDX_ITEMS) - params: - index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') - output: - sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') - threads: THREADS - shell: - 'bowtie2 --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}' +if ALIGNER == 'bowtie2': + rule align_to_major: + input: + reads1 = READS1, + idx = expand( + os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), + idx = IDX_ITEMS) + params: + index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') + output: + sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') + threads: THREADS + shell: + 'bowtie2 --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}' +else: # 'bwa-mem2' + rule align_to_major: + input: + reads1 = READS1, + idx = expand( + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'), + idx = IDX_ITEMS) + params: + reference = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa') + output: + sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam') + threads: THREADS + shell: + 'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} | samtools view -h -F 2048 -o {output.sam} -' ''' Split first-pass alignment into high-quality and low-quality files. ''' rule refflow_split_aln_by_mapq: @@ -35,23 +50,34 @@ rule refflow_split_aln_by_mapq: 'samtools fastq {output.lowq} > {output.lowq_reads}' ''' Align low-quality reads using population genomes.''' -rule refflow_align_secondpass_single_end: - input: - reads1 = os.path.join(DIR_FIRST_PASS, - EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), - idx1 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'), - idx2 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'), - idx3 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'), - idx4 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'), - idx5 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'), - idx6 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2') - params: - index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) - output: - sam = PREFIX_SECOND_PASS + '.sam' - threads: THREADS - shell: - 'bowtie2 --reorder --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}' +if ALIGNER == 'bowtie2': + rule refflow_align_secondpass_single_end: + input: + [os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind) + for ind in IDX_ITEMS], + reads1 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), + params: + index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) + output: + sam = PREFIX_SECOND_PASS + '.sam' + threads: THREADS + shell: + 'bowtie2 --reorder --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}' +else: # bwa-mem2 + rule refflow_align_secondpass_single_end: + input: + [os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind) + for ind in IDX_ITEMS], + reads1 = os.path.join(DIR_FIRST_PASS, + EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'), + params: + reference = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa') + output: + sam = PREFIX_SECOND_PASS + '.sam' + threads: THREADS + shell: + 'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} | samtools view -h -F 2048 | samtools sort -n -o {output.sam}' ''' Merge refflow results. ''' rule refflow_merge_secondpass: diff --git a/snakemake/shared/prepare_pop_genome.Snakefile b/snakemake/shared/prepare_pop_genome.Snakefile index 5838741..06ad214 100644 --- a/snakemake/shared/prepare_pop_genome.Snakefile +++ b/snakemake/shared/prepare_pop_genome.Snakefile @@ -101,31 +101,47 @@ rule build_pop_genome: --out-prefix {params.prefix} \ --include-indels') -rule build_pop_genome_index: - input: - genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa') - output: - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'), - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'), - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'), - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'), - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'), - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2') - params: - os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) - threads: THREADS - shell: - 'bowtie2-build --threads {threads} {input.genome} {params};' - -rule check_pop_genome: - input: - expand( - DIR_POP_GENOME_BLOCK_IDX + WG_POP_GENOME_SUFFIX + '.{IDX_ITEMS}.bt2', - GROUP=GROUP, IDX_ITEMS=IDX_ITEMS, POP_LEVEL=POP_LEVEL - ) - output: - touch(temp(os.path.join(DIR, 'prepare_pop_genome.done'))) - +if ALIGNER == 'bowtie2': + rule build_pop_genome_index: + input: + genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa') + output: + [os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind) + for ind in IDX_ITEMS] + params: + os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX) + threads: THREADS + shell: + 'bowtie2-build --threads {threads} {input.genome} {params};' +else: # 'bwa-mem2' + rule build_pop_genome_index: + input: + genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa') + output: + [os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind) + for ind in IDX_ITEMS] + threads: 1 + shell: + 'bwa-mem2 index {input}' +if ALIGNER == 'bowtie2': + rule check_pop_genome: + input: + expand( + DIR_POP_GENOME_BLOCK_IDX + WG_POP_GENOME_SUFFIX + '.{IDX_ITEMS}.bt2', + GROUP=GROUP, IDX_ITEMS=IDX_ITEMS, POP_LEVEL=POP_LEVEL + ) + output: + touch(temp(os.path.join(DIR, 'prepare_pop_genome.done'))) +else: # 'bwa-mem2' + rule check_pop_genome: + input: + expand( + DIR_POP_GENOME_BLOCK + WG_POP_GENOME_SUFFIX + '.fa.{IDX_ITEMS}', + GROUP=GROUP, IDX_ITEMS=IDX_ITEMS, + POP_LEVEL=POP_LEVEL + ) + output: + touch(temp(os.path.join(DIR, 'prepare_pop_genome.done'))) ''' Rules for building indexes for liftover. diff --git a/snakemake/shared/prepare_standard_genome.Snakefile b/snakemake/shared/prepare_standard_genome.Snakefile index f63fc8e..ca66d4b 100644 --- a/snakemake/shared/prepare_standard_genome.Snakefile +++ b/snakemake/shared/prepare_standard_genome.Snakefile @@ -8,28 +8,50 @@ rule build_major: vcfgz_idx = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.vcf.gz.csi'), out_genome = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa'), shell: - '{BCFTOOLS} view -O z -q 0.5000001 -G -o {output.vcf} -v snps,indels -m2 -M2 {input.vcf};' - 'bgzip -c {output.vcf} > {output.vcfgz};' - '{BCFTOOLS} index {output.vcfgz};' + 'mkdir -p `dirname {input.vcf}`/major; ' + '{BCFTOOLS} view -O v -q 0.5000001 -G -o {output.vcf} -v snps,indels -m2 -M2 {input.vcf}; ' + 'bgzip -c {output.vcf} > {output.vcfgz}; ' + '{BCFTOOLS} index {output.vcfgz}; ' '{BCFTOOLS} consensus -f {input.genome} -o {output.out_genome} {output.vcfgz}' -rule build_major_index: - input: - os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa') - output: - expand( - os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), - idx = IDX_ITEMS) - params: - os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') - threads: THREADS - shell: - 'bowtie2-build --threads {threads} {input} {params}' - -rule check_standard_genomes: - input: - expand( - os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), - idx = IDX_ITEMS), - output: - touch(temp(os.path.join(DIR, 'prepare_standard_genome.done'))) +if ALIGNER == 'bowtie2': + rule build_major_index: + input: + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa') + output: + expand( + os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), + idx = IDX_ITEMS) + params: + os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj') + threads: THREADS + shell: + 'bowtie2-build --threads {threads} {input} {params}' +else: # 'bwa-mem2' + rule build_major_index: + input: + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa') + output: + expand( + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'), + idx = IDX_ITEMS) + threads: 1 + shell: + 'bwa-mem2 index {input}' + +if ALIGNER == 'bowtie2': + rule check_standard_genomes: + input: + expand( + os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'), + idx = IDX_ITEMS), + output: + touch(temp(os.path.join(DIR, 'prepare_standard_genome.done'))) +else: # 'bwa-mem2' + rule check_standard_genomes: + input: + expand( + os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'), + idx = IDX_ITEMS) + output: + touch(temp(os.path.join(DIR, 'prepare_standard_genome.done'))) From 8863683c9707211fe8c3b4985584447b1d432caa Mon Sep 17 00:00:00 2001 From: Metin Balaban Date: Sun, 26 May 2024 17:39:32 -0500 Subject: [PATCH 2/4] download tbis for 1kg --- src/download_1kg_vcf.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/download_1kg_vcf.sh b/src/download_1kg_vcf.sh index 227ab64..1924114 100644 --- a/src/download_1kg_vcf.sh +++ b/src/download_1kg_vcf.sh @@ -1,4 +1,6 @@ # wget -P resources/1kg_vcf/ -r -l1 --no-parent -A "vcf.gz" http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ wget -P resources/1kg_vcf/ http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr{1..22}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz -wget -P resources/1kg_vcf/ http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr{X}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz +wget -P resources/1kg_vcf/ http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr{1..22}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz.tbi +wget -P resources/1kg_vcf/ http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz +wget -P resources/1kg_vcf/ http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chrX.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz.tbi # http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr*.vcf.gz From 8499e44b52bc66d7311085493686ba8380448a5c Mon Sep 17 00:00:00 2001 From: Metin Balaban Date: Mon, 27 May 2024 08:09:18 -0500 Subject: [PATCH 3/4] updated documentation --- README.md | 11 +++++++++++ snakemake/README.md | 2 ++ snakemake/config.yaml | 6 +++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 92f02cc..1e8c13e 100644 --- a/README.md +++ b/README.md @@ -97,3 +97,14 @@ When all set, run ``` snakemake -j 32 ``` + +## Running reference flow with bwa-mem2 + +Instead of bowtie2, reference flow can use bwa-mem2 for aligning reads. Users must set: + +``` +ALIGNER = 'bowtie2' +``` +in the config file. + + Reference flow does not support supplentary read alignments when coupled with bwa-mem2. Pre-build indices for bwa-mem2 are not available to download but users can build them by setting the `USE_PREBUILT` option to `False`, which runs the complete reference flow pipeline. diff --git a/snakemake/README.md b/snakemake/README.md index 4b12cfc..23d732f 100644 --- a/snakemake/README.md +++ b/snakemake/README.md @@ -19,6 +19,8 @@ This is different from the number specified by `snakemake -j `, which is the - `SORT_SAM` : whether to sort the final SAM output +- `ALIGNER` : Alignment method. The workflow supports bowtie2 or bwa-mem2. + - `ALN_MAPQ_THRSD` : mapping quality cutoff to split read into committed and deferred groups diff --git a/snakemake/config.yaml b/snakemake/config.yaml index 5b8301a..1bc3573 100644 --- a/snakemake/config.yaml +++ b/snakemake/config.yaml @@ -23,12 +23,12 @@ USE_PREBUILT : True # Whether to sort the output SAM SORT_SAM : False +# Alignment method; either 'bowtie2' or 'bwa-mem2' +ALIGNER : 'bowtie2' + # Reference genome; usually a standard GRC genome GENOME : '../resources/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna' -# alignment method; either 'bowtie2' or 'bwa-mem2' -ALIGNER : 'bowtie2' - # Chromosomes included CHROM : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X'] # Prefix of a chromosome. Usually set to 'chr' (GRCh38) or '' (hg19) From 81a13d7ea82881078d1a1afc2b1756bfdb798053 Mon Sep 17 00:00:00 2001 From: balabanmetin Date: Mon, 27 May 2024 09:10:34 -0400 Subject: [PATCH 4/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e8c13e..e6320aa 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ snakemake -j 32 Instead of bowtie2, reference flow can use bwa-mem2 for aligning reads. Users must set: ``` -ALIGNER = 'bowtie2' +ALIGNER = 'bwa-mem2' ``` in the config file.