Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions PCGC/PCGC_code/cufflinks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
#$ -cwd
bam=$1
gtf=$2
dir=$3
out="Cufflinksout"

if [ ! -d $out ]; then mkdir -p $out; fi
if [ ! -d $dir ]; then mkdir -p $dir; fi

bam_base=`basename $bam .bam`
individualOut=$bam_base"_cufflinks"
cmd="/ifs/data/c2b2/ngs_lab/ngs/usr/bin/cufflinks -o $out/$individualOut --compatible-hits-norm --GTF $gtf $bam"
echo -e "do cufflinks with ref genes: \n $cmd"
$cmd
cp ./$out/$individualOut/genes.fpkm_tracking $dir/${bam_base}.genes.fpkm_tracking

17 changes: 17 additions & 0 deletions PCGC/PCGC_code/handleStar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash -l
#$-cwd

r1=$1
r2=$2
code=/ifs/home/c2b2/ys_lab/wm2313/code/
log=starlogs

if [ ! -d $log ] ; then mkdir -p $log; fi

r1_base=`basename $r1 .gz`
r2_base=`basename $r2 .gz`

echo $r1_base

## qsub -l mem=11G,time=16:: -P shen-chung -pe smp 4 -m e -o $log/star.$r1_base.o -e $log/star.$r1_base.e -N star.$r1_base $code/star.sh $r1 $r2
qsub -l mem=6G,time=14:: -P ys_lab -pe smp 6 -o $log/star.$r1_base.o -e $log/star.$r1_base.e -N star.$r1_base $code/star.sh $r1 $r2
15 changes: 15 additions & 0 deletions PCGC/PCGC_code/run_GTEx_star.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
#$ -cwd

dir=$1 ## the dir of fastq files
## /ifs/home/c2b2/ys_lab/wm2313/gtex/Heart_reads/fastq/
for r1 in $dir/*_1.fastq.gz; do
r1_base=`basename $r1 _1.fastq.gz`
r2=$dir/${r1_base}_2.fastq.gz

if [[ -e $r2 ]]; then
echo $r1
echo $r2
sh handleStar.sh $r1 $r2;
fi
done
82 changes: 82 additions & 0 deletions PCGC/PCGC_code/star.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash -l
#$ -cwd

R1=$1
R2=$2

## check the length of reads to choose star configuration
read=`zcat -d $R1| head | tail -n +10`
readLength=`echo ${#read}`
echo "fastq read length: "$readLength

out="StarBam"
if [ ! -d $out ] ; then mkdir -p $out; fi

R1_base=`basename $R1`
cd $out

if [ -d ${R1_base} ] ; then rm -r ${R1_base}; fi

mkdir -p ${R1_base}
cd ${R1_base}

echo "star align begin..."
date

if [[ $readLength -gt 60 ]];
then
genomeDB="~/scratch/softwareMakeup/genome100"
seedStart=50
else
genomeDB="/ifs/home/c2b2/ys_lab/wm2313/data/softwares/STAR_2.3.0e/genome"
seedStart=30
fi

/ifs/home/c2b2/ys_lab/wm2313/data/softwares/STAR_2.3.0e/STAR --genomeDir $genomeDB --sjdbGTFfile /ifs/home/c2b2/ys_lab/wm2313/data/GSNAP/chr_genes.gtf --readFilesCommand zcat --seedSearchStartLmax $seedStart --runThreadN 4 --outSAMunmapped Within --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --readFilesIn $R1 $R2 --outStd SAM --outSAMmode Full | ~/data/softwares/samtools-0.1.19/samtools view -bS -> ${R1_base}.bam

## --outSAMstrandField intronMotif is used to add XS strand attribute for alignments that contain splice junctions.
## --outFilterIntronMotifs RemoveNoncanonical filter out alignments that contain non-canonical junctions

echo "star alignment finished: "
date

bam=${R1_base}.bam
sortbam=${R1_base}.hg19UCSC.star.sort.bam
~/data/softwares/samtools-0.1.19/samtools sort $bam ${R1_base}.hg19UCSC.star.sort
echo "samtools sort finished: "
date

if [[ -e $sortbam ]];
then
rm -f $bam
fi

~/data/softwares/samtools-0.1.19/samtools index $sortbam
echo "samtools index finished: "
date

if [[ -e ${R1_base}".hg19UCSC.star.sort.bam.bai" ]];

then
echo ${R1_base} done! >> ../starbam.checklist

else
echo echo ${R1_base} failed! >> ../starbam.checklist
fi



logs="analysis_logs"
if [ ! -d $logs ] ; then mkdir -p $logs; fi
currentDir=`pwd`
parentDir=`dirname $currentDir`
## gtf=/ifs/home/c2b2/ys_lab/wm2313/data/GSNAP/chr_genes.gtf
gtf=/ifs/home/c2b2/ys_lab/wm2313/data/GSNAP/gencode.v19.annotation.gtf

## peform alignment QC
qsub -l mem=5G,time=5:: -pe smp 6 -o $logs/${R1_base}.RNAseqc.o -e $logs/${R1_base}.RNAseqc.e -N RNASEQC.${R1_base} ~/code/star_RNA-SeQC_pipe.sh $sortbam $gtf

## compute FPKM with cufflinks
mkdir $parentDir/fpkms ## where to put the fpkm files(gene level)
qsub -l mem=1G,time=16:: -pe smp 6 -m e -o $logs/${R1_base}.cufflink.o -e $logs/${R1_base}.cufflink.e -N cufflinks.${R1_base} ~/code/cufflinks.sh $sortbam $gtf $parentDir/fpkms

61 changes: 61 additions & 0 deletions PCGC/PCGC_code/star_RNA-SeQC_pipe.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/bash -l
#$ -cwd

bam=$1
gtf=$2

gsnap_dir=/ifs/home/c2b2/ys_lab/wm2313/data/GSNAP
reference=$gsnap_dir/hg19.fa
bam_base=`basename $bam .bam`
sample=${bam_base}
reheaderbam=${bam_base}".reheader.bam"

if [[ ! -e $reheaderbam.bai ]]; then

echo reheader the bam files for GATK...
~/data/softwares/jdk1.7.0_60/bin/java -jar -Xmx3g /ifs/home/c2b2/ys_lab/wm2313/data/softwares/picard/AddOrReplaceReadGroups.jar I=$bam O=$reheaderbam LB=XX PL=XX PU=XX SM=XX
echo index the new bam file..
~/data/softwares/samtools-0.1.19/samtools index $reheaderbam

fi


echo perform RNA-SeQC..
~/data/softwares/jdk1.7.0_60/bin/java -jar -Xmx10g /ifs/home/c2b2/ys_lab/wm2313/data/softwares/RNA-SeQC_v1.1.7.jar -o ./"RNA-SeQC" -gatkFlags "-DBQ 1" -r $reference -t $gtf -s "$sample|$reheaderbam|NA"



## get a summary of the mapping quality
cd RNA-SeQC

summary=../../rnaseqc.info
if [[ ! -e $summary ]]; then
echo -e "sample\tReadsNO\tmappingRate\tintraRate\texonicRate\tintronicRate\tintergenicRate\treadsLength" > $summary
fi

ID=`echo ${bam_base} | cut -d '.' -f1-3`

if [[ -e metrics.tsv ]]; then

data=`tail -n +2 metrics.tsv`
intraRate=`echo $data | cut -d " " -f5,5`
exonicRate=`echo $data | cut -d " " -f7,7`

mappingRate=`echo $data | cut -d " " -f8,8`

readsLength=`echo $data | cut -d " " -f12,12`
mappedNO=`echo $data | cut -d " " -f17,17`

intergenicRate=`echo $data | cut -d " " -f18,18`
intronicRate=`echo $data | cut -d " " -f28,28`
readsNO=`echo "$mappedNO/$mappingRate" | bc`

echo -e "$ID\t$readsNO\t$mappingRate\t$intraRate\t$exonicRate\t$intronicRate\t$intergenicRate\t$readsLength" >> $summary

cd ..
rm -f $reheaderbam
rm -f $reheaderbam.bai

else
echo -e "$ID\tfails on RNA-SeQC" >> $summary
fi