diff --git a/simulationScripts/commands.sh b/simulationScripts/commands.sh index 45937c3..3b91738 100644 --- a/simulationScripts/commands.sh +++ b/simulationScripts/commands.sh @@ -226,12 +226,25 @@ cd .. # # Run Sailfish # mkdir sailfish # cd sailfish # -sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A1/out1.fq -2 ../A1/out2.fq -o A1 # -sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A2/out1.fq -2 ../A2/out2.fq -o A2 # -sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A3/out1.fq -2 ../A3/out2.fq -o A3 # -sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A4/out1.fq -2 ../A4/out2.fq -o A4 # -sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A5/out1.fq -2 ../A5/out2.fq -o A5 # +sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A1/out1.fq -2 ../A1/out2.fq -o A1 # +sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A2/out1.fq -2 ../A2/out2.fq -o A2 # +sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A3/out1.fq -2 ../A3/out2.fq -o A3 # +sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A4/out1.fq -2 ../A4/out2.fq -o A4 # +sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A5/out1.fq -2 ../A5/out2.fq -o A5 # # Process output # +# Fix the target names +echo "fixing output names" +python ../../processSailfishNames.py --input A1/quant.sf --output A1/quant.fixed.sf +python ../../processSailfishNames.py --input A2/quant.sf --output A2/quant.fixed.sf +python ../../processSailfishNames.py --input A3/quant.sf --output A3/quant.fixed.sf +python ../../processSailfishNames.py --input A4/quant.sf --output A4/quant.fixed.sf +python ../../processSailfishNames.py --input A5/quant.sf --output A5/quant.fixed.sf +mv A1/quant.fixed.sf A1/quant.sf +mv A2/quant.fixed.sf A2/quant.sf +mv A3/quant.fixed.sf A3/quant.sf +mv A4/quant.fixed.sf A4/quant.sf +mv A5/quant.fixed.sf A5/quant.sf +echo "done" R CMD BATCH ../../sailfishNames.R # R CMD BATCH ../../withinGeneSailfish.R # cd .. # diff --git a/simulationScripts/generate-known-fa-files.sh b/simulationScripts/generate-known-fa-files.sh index 869331f..2966d5e 100644 --- a/simulationScripts/generate-known-fa-files.sh +++ b/simulationScripts/generate-known-fa-files.sh @@ -11,7 +11,7 @@ rm -r A1.tophat.out cd transcriptome_data rsem-prepare-reference --bowtie2 known.fa ref #this is for rsem sed -e 's/\,.*//' known.fa > output.fasta #this is for making tigar names work -sailfish index -t known.fa -o indDir -k 20 -p 4 #this is for sailfish +sailfish index -t known.fa -o indDir -p 4 #this is for sailfish diff --git a/simulationScripts/processSailfishNames.py b/simulationScripts/processSailfishNames.py new file mode 100644 index 0000000..b4ba421 --- /dev/null +++ b/simulationScripts/processSailfishNames.py @@ -0,0 +1,33 @@ +from __future__ import print_function +import argparse +import os +import sys +import re + +def main(args): + ifile = args.input + if (not os.path.isfile(ifile)): + print('Could not find input file {}'.format(ifile), file=sys.stderr) + sys.exit(1) + + lines = open(ifile).readlines() + headers = [l for l in lines if l.startswith('#')] + data = [l for l in lines if not l.startswith('#')] + + with open(args.output, 'w') as ofile: + for l in headers: + ofile.write(l) + for l in data: + toks = l.rstrip('\n').split('\t') + name = re.split('\s+', toks[0])[0] + newline = '\t'.join([name] + toks[1:]) + ofile.write("{}\n".format(newline)) + + print("successfully modified file", file=sys.stderr) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process crazy transcript names') + parser.add_argument('--input', type=str, required=True, help='the quant.sf file') + parser.add_argument('--output', type=str, required=True, help='where the processed file should be placed') + args = parser.parse_args() + main(args) diff --git a/simulationScripts/sailfishNames.R b/simulationScripts/sailfishNames.R index fb9005c..1bbde26 100644 --- a/simulationScripts/sailfishNames.R +++ b/simulationScripts/sailfishNames.R @@ -9,7 +9,7 @@ while (length(oneLine <- readLines(myFile, n = 1, warn = FALSE)) > 0){ } close(myFile) par(mfrow = c(2,3)) -A1 <- read.table("A1/quant_bias_corrected.sf") +A1 <- read.table("A1/quant.sf", comment.char="#") ind <- A1[,2] groundTruth <- read.table("../A1/transcriptNames.txt") sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1])) @@ -17,7 +17,7 @@ colnames(sailA1) <- c("id","true","est") for (i in 1:dim(groundTruth)[1]){ j <- which(sailNames[,2] == as.character(sailA1[i,1])) k <- which(A1[,1] == sailNames[j,1]) - sailA1[i,3] <- A1[k,7] + sailA1[i,3] <- A1[k,4] if(i%%1000 == 0){print(i)} } sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2])) @@ -25,7 +25,7 @@ sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3])) #smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2) write.table(sailA1,file = "sailA1.txt") -A1 <- read.table("A2/quant_bias_corrected.sf") +A1 <- read.table("A2/quant.sf", comment.char="#") ind <- A1[,2] groundTruth <- read.table("../A2/transcriptNames.txt") sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1])) @@ -33,7 +33,7 @@ colnames(sailA1) <- c("id","true","est") for (i in 1:dim(groundTruth)[1]){ j <- which(sailNames[,2] == as.character(sailA1[i,1])) k <- which(A1[,1] == sailNames[j,1]) - sailA1[i,3] <- A1[k,7] + sailA1[i,3] <- A1[k,4] if(i%%1000 == 0){print(i)} } sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2])) @@ -41,7 +41,7 @@ sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3])) #smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2) write.table(sailA1,file = "sailA2.txt") -A1 <- read.table("A3/quant_bias_corrected.sf") +A1 <- read.table("A3/quant.sf", comment.char="#") ind <- A1[,2] groundTruth <- read.table("../A3/transcriptNames.txt") sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1])) @@ -49,7 +49,7 @@ colnames(sailA1) <- c("id","true","est") for (i in 1:dim(groundTruth)[1]){ j <- which(sailNames[,2] == as.character(sailA1[i,1])) k <- which(A1[,1] == sailNames[j,1]) - sailA1[i,3] <- A1[k,7] + sailA1[i,3] <- A1[k,4] if(i%%1000 == 0){print(i)} } sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2])) @@ -58,7 +58,7 @@ sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3])) write.table(sailA1,file = "sailA3.txt") -A1 <- read.table("A4/quant_bias_corrected.sf") +A1 <- read.table("A4/quant.sf", comment.char="#") ind <- A1[,2] groundTruth <- read.table("../A4/transcriptNames.txt") sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1])) @@ -66,7 +66,7 @@ colnames(sailA1) <- c("id","true","est") for (i in 1:dim(groundTruth)[1]){ j <- which(sailNames[,2] == as.character(sailA1[i,1])) k <- which(A1[,1] == sailNames[j,1]) - sailA1[i,3] <- A1[k,7] + sailA1[i,3] <- A1[k,4] if(i%%1000 == 0){print(i)} } sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2])) @@ -74,7 +74,7 @@ sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3])) #smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2) write.table(sailA1,file = "sailA4.txt") -A1 <- read.table("A5/quant_bias_corrected.sf") +A1 <- read.table("A5/quant.sf", comment.char="#") ind <- A1[,2] groundTruth <- read.table("../A5/transcriptNames.txt") sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1])) @@ -82,7 +82,7 @@ colnames(sailA1) <- c("id","true","est") for (i in 1:dim(groundTruth)[1]){ j <- which(sailNames[,2] == as.character(sailA1[i,1])) k <- which(A1[,1] == sailNames[j,1]) - sailA1[i,3] <- A1[k,7] + sailA1[i,3] <- A1[k,4] if(i%%1000 == 0){print(i)} } sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2])) diff --git a/simulationScripts/set-path-variable.sh b/simulationScripts/set-path-variable.sh index 4f477df..ef8da59 100644 --- a/simulationScripts/set-path-variable.sh +++ b/simulationScripts/set-path-variable.sh @@ -11,7 +11,7 @@ anaconda/1.9.1 rsem-1.2.15/ express/express-1.5.1-linux_x86_64 tigar2 -export LD_LIBRARY_PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.6.3-Linux_x86-64/lib:$LD_LIBRARY_PATH -export PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.6.3-Linux_x86-64/bin:$PATH +export LD_LIBRARY_PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.7.3-Linux_x86-64/lib:$LD_LIBRARY_PATH +export PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.7.3-Linux_x86-64/bin:$PATH compilers/gcc/4.8.2