Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,14 @@ When all set, run
```
snakemake -j 32
```

## Running reference flow with bwa-mem2

Instead of bowtie2, reference flow can use bwa-mem2 for aligning reads. Users must set:

```
ALIGNER = 'bwa-mem2'
```
in the config file.

Reference flow does not support supplentary read alignments when coupled with bwa-mem2. Pre-build indices for bwa-mem2 are not available to download but users can build them by setting the `USE_PREBUILT` option to `False`, which runs the complete reference flow pipeline.
2 changes: 2 additions & 0 deletions snakemake/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ This is different from the number specified by `snakemake -j <t>`, which is the

- `SORT_SAM` : whether to sort the final SAM output

- `ALIGNER` : Alignment method. The workflow supports bowtie2 or bwa-mem2.

- `ALN_MAPQ_THRSD` : mapping quality cutoff to split read into committed and deferred groups


Expand Down
11 changes: 9 additions & 2 deletions snakemake/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ CHR_PREFIX = config['CHR_PREFIX']
LENGTH_MAP = config['LENGTH_MAP']
CHROM_MAP = config['CHROM_MAP']

ALIGNER = config['ALIGNER']
assert ALIGNER in ['bowtie2', 'bwa-mem2']

FAMILY = config['FAMILY']
SPOP = config['SPOP']
BCFTOOLS = config['BCFTOOLS']
Expand All @@ -47,8 +50,12 @@ THREADS = config['THREADS']
RAND_SEED = config['RAND_SEED']
''''''

# Bowtie 2 index extensions
IDX_ITEMS = ['1', '2', '3', '4', 'rev.1', 'rev.2']
if ALIGNER == 'bowtie2':
# Bowtie 2 index extensions
IDX_ITEMS = ['1', '2', '3', '4', 'rev.1', 'rev.2']
else:
# bwa-mem2 index extensions
IDX_ITEMS = ['amb', 'ann', 'bwt.2bit.64', '0123', 'pac']

# Prefixes and directory paths for major-allele reference contruction and indexing
PREFIX_MAJOR_F = os.path.join(DIR, 'major/{CHROM}_filtered_major')
Expand Down
5 changes: 4 additions & 1 deletion snakemake/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ USE_PREBUILT : True
# Whether to sort the output SAM
SORT_SAM : False

# Alignment method; either 'bowtie2' or 'bwa-mem2'
ALIGNER : 'bowtie2'

# Reference genome; usually a standard GRC genome
GENOME : '../resources/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna'

# Chromosomes included
CHROM : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y']
CHROM : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X']
# Prefix of a chromosome. Usually set to 'chr' (GRCh38) or '' (hg19)
CHR_PREFIX : 'chr'

Expand Down
95 changes: 62 additions & 33 deletions snakemake/shared/alignment_paired_end.Snakefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
''' This snakefile includes rules that perform paired-end alignment.'''
''' Perform first-pass alignment.'''
rule align_to_major:
input:
reads1 = READS1,
reads2 = READS2,
idx = expand(
os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'),
idx = IDX_ITEMS)
params:
index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bowtie2 --threads {THREADS} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam}'
if ALIGNER == 'bowtie2':
rule align_to_major:
input:
reads1 = READS1,
reads2 = READS2,
idx = expand(
os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'),
idx = IDX_ITEMS)
params:
index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bowtie2 --threads {THREADS} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam}'
else: # 'bwa-mem2'
rule align_to_major:
input:
reads1 = READS1,
reads2 = READS2,
idx = expand(
os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'),
idx = IDX_ITEMS)
params:
reference = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} {input.reads2} | samtools view -h -F 2048 -o {output.sam} -'

''' Split first-pass alignment into high-quality and low-quality files. '''
rule refflow_split_aln_by_mapq:
Expand Down Expand Up @@ -41,25 +57,38 @@ rule refflow_split_aln_by_mapq:
-t {ALN_MAPQ_THRSD} --paired-end --split-strategy {params.split_strategy}'

''' Align low-quality reads using population genomes.'''
rule refflow_align_secondpass_paired_end:
input:
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
reads2 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'),
idx1 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'),
idx2 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'),
idx3 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'),
idx4 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'),
idx5 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'),
idx6 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2')
params:
index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bowtie2 --reorder --threads {threads} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam};'
if ALIGNER == 'bowtie2':
rule refflow_align_secondpass_paired_end:
input:
[os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind)
for ind in IDX_ITEMS],
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
reads2 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'),
params:
index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bowtie2 --reorder --threads {threads} -x {params.index} -1 {input.reads1} -2 {input.reads2} -S {output.sam};'
else: # bwa-mem2
rule refflow_align_secondpass_paired_end:
input:
[os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind)
for ind in IDX_ITEMS],
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
reads2 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_2.fq'),
params:
reference = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa')
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} {input.reads2} | samtools view -h -F 2048 | samtools sort -n -o {output.sam}'

rule refflow_merge_secondpass:
input:
Expand Down
86 changes: 56 additions & 30 deletions snakemake/shared/alignment_single_end.Snakefile
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
''' This snakefile includes rules that perform single-end alignment.'''
''' Perform first-pass alignment.'''
rule align_to_major:
input:
reads1 = READS1,
idx = expand(
os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'),
idx = IDX_ITEMS)
params:
index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bowtie2 --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}'
if ALIGNER == 'bowtie2':
rule align_to_major:
input:
reads1 = READS1,
idx = expand(
os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj.{idx}.bt2'),
idx = IDX_ITEMS)
params:
index = os.path.join(DIR, 'major/indexes/' + EXP_LABEL + '-maj')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bowtie2 --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}'
else: # 'bwa-mem2'
rule align_to_major:
input:
reads1 = READS1,
idx = expand(
os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa.{idx}'),
idx = IDX_ITEMS)
params:
reference = os.path.join(DIR, 'major/' + EXP_LABEL + '-maj.fa')
output:
sam = os.path.join(DIR_FIRST_PASS, EXP_LABEL + '-major.sam')
threads: THREADS
shell:
'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} | samtools view -h -F 2048 -o {output.sam} -'

''' Split first-pass alignment into high-quality and low-quality files. '''
rule refflow_split_aln_by_mapq:
Expand All @@ -35,23 +50,34 @@ rule refflow_split_aln_by_mapq:
'samtools fastq {output.lowq} > {output.lowq_reads}'

''' Align low-quality reads using population genomes.'''
rule refflow_align_secondpass_single_end:
input:
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
idx1 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'),
idx2 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'),
idx3 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'),
idx4 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'),
idx5 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'),
idx6 = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2')
params:
index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bowtie2 --reorder --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}'
if ALIGNER == 'bowtie2':
rule refflow_align_secondpass_single_end:
input:
[os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind)
for ind in IDX_ITEMS],
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
params:
index = os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bowtie2 --reorder --threads {THREADS} -x {params.index} -U {input.reads1} -S {output.sam}'
else: # bwa-mem2
rule refflow_align_secondpass_single_end:
input:
[os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind)
for ind in IDX_ITEMS],
reads1 = os.path.join(DIR_FIRST_PASS,
EXP_LABEL + '-major-mapqlt' + ALN_MAPQ_THRSD + '_1.fq'),
params:
reference = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa')
output:
sam = PREFIX_SECOND_PASS + '.sam'
threads: THREADS
shell:
'bwa-mem2 mem -T 0 -t {threads} {params.reference} {input.reads1} | samtools view -h -F 2048 | samtools sort -n -o {output.sam}'

''' Merge refflow results. '''
rule refflow_merge_secondpass:
Expand Down
66 changes: 41 additions & 25 deletions snakemake/shared/prepare_pop_genome.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,31 +101,47 @@ rule build_pop_genome:
--out-prefix {params.prefix} \
--include-indels')

rule build_pop_genome_index:
input:
genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa')
output:
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.1.bt2'),
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.2.bt2'),
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.3.bt2'),
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.4.bt2'),
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.1.bt2'),
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.rev.2.bt2')
params:
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
threads: THREADS
shell:
'bowtie2-build --threads {threads} {input.genome} {params};'

rule check_pop_genome:
input:
expand(
DIR_POP_GENOME_BLOCK_IDX + WG_POP_GENOME_SUFFIX + '.{IDX_ITEMS}.bt2',
GROUP=GROUP, IDX_ITEMS=IDX_ITEMS, POP_LEVEL=POP_LEVEL
)
output:
touch(temp(os.path.join(DIR, 'prepare_pop_genome.done')))

if ALIGNER == 'bowtie2':
rule build_pop_genome_index:
input:
genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa')
output:
[os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX + '.%s.bt2' % ind)
for ind in IDX_ITEMS]
params:
os.path.join(DIR_POP_GENOME_BLOCK_IDX, WG_POP_GENOME_SUFFIX)
threads: THREADS
shell:
'bowtie2-build --threads {threads} {input.genome} {params};'
else: # 'bwa-mem2'
rule build_pop_genome_index:
input:
genome = os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa')
output:
[os.path.join(DIR_POP_GENOME_BLOCK, WG_POP_GENOME_SUFFIX + '.fa.%s' % ind)
for ind in IDX_ITEMS]
threads: 1
shell:
'bwa-mem2 index {input}'
if ALIGNER == 'bowtie2':
rule check_pop_genome:
input:
expand(
DIR_POP_GENOME_BLOCK_IDX + WG_POP_GENOME_SUFFIX + '.{IDX_ITEMS}.bt2',
GROUP=GROUP, IDX_ITEMS=IDX_ITEMS, POP_LEVEL=POP_LEVEL
)
output:
touch(temp(os.path.join(DIR, 'prepare_pop_genome.done')))
else: # 'bwa-mem2'
rule check_pop_genome:
input:
expand(
DIR_POP_GENOME_BLOCK + WG_POP_GENOME_SUFFIX + '.fa.{IDX_ITEMS}',
GROUP=GROUP, IDX_ITEMS=IDX_ITEMS,
POP_LEVEL=POP_LEVEL
)
output:
touch(temp(os.path.join(DIR, 'prepare_pop_genome.done')))

'''
Rules for building indexes for liftover.
Expand Down
Loading