-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparse_fischer.py
More file actions
71 lines (56 loc) · 2.28 KB
/
parse_fischer.py
File metadata and controls
71 lines (56 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import csv
import logging
import multiprocessing as mp
import os
import time
from collections import defaultdict
from random import sample as random_sample
import click
import numpy as np
import pandas as pd
import pyomo.environ as aml
import pyomo.kernel as pmo
import Fred2
import utilities
from Fred2.Core import (Allele, Peptide, Protein,
generate_peptides_from_proteins)
from Fred2.Core.Peptide import Peptide
from Fred2.EpitopePrediction import (EpitopePredictionResult,
EpitopePredictorFactory)
from Fred2.IO import FileReader
from team_orienteering_ilp import TeamOrienteeringIlp
LOGGER = None
@click.command()
@click.argument('input-peptides', type=click.Path())
@click.argument('input-vaccine-sequences', type=click.Path())
@click.argument('output-vaccine-epitopes', type=click.Path())
@click.option('--verbose', '-v', is_flag=True, help='Print debug messages')
@click.option('--log-file', '-l', type=click.Path(), help='Where to store the logs')
def main(input_peptides, input_vaccine_sequences, output_vaccine_epitopes, verbose, log_file):
''' Reads the vaccine produced by Fischer's online tool and converts it into epitopes
'''
global LOGGER
LOGGER = utilities.init_logging(verbose, log_file, log_append=False)
LOGGER.info('Reading peptides...')
with open(input_peptides) as f:
peptides = set(r['peptide'] for r in csv.DictReader(f))
LOGGER.info('Read %d peptides', len(peptides))
LOGGER.info('Reading vaccine...')
mosaics = FileReader.read_fasta(input_vaccine_sequences, in_type=Protein)
LOGGER.info('Vaccine has %d mosaic(s)', len(mosaics))
with open(output_vaccine_epitopes, 'w') as f:
writer = csv.writer(f)
writer.writerow(('cocktail', 'index', 'epitope'))
for c, mos in enumerate(mosaics):
pep_count = unk_count = 0
for i in range(0, len(mos) - 8):
pep = mos[i:i + 9]
assert len(pep) == 9
if pep in peptides:
writer.writerow((c, pep_count, pep))
pep_count += 1
else:
unk_count += 1
LOGGER.info('Mosaic %d - Recognized: %d Unknown %d', c + 1, pep_count, unk_count)
if __name__ == '__main__':
main()