diff --git a/dna2vec/multi_k_model.py b/dna2vec/multi_k_model.py index 1d1513c..81f1131 100644 --- a/dna2vec/multi_k_model.py +++ b/dna2vec/multi_k_model.py @@ -4,20 +4,20 @@ import tempfile import numpy as np -from gensim.models import word2vec +from gensim.models import KeyedVectors from gensim import matutils class SingleKModel: def __init__(self, model): self.model = model - self.vocab_lst = sorted(model.vocab.keys()) + self.vocab_lst = sorted(model.index_to_key) class MultiKModel: def __init__(self, filepath): - self.aggregate = word2vec.Word2Vec.load_word2vec_format(filepath, binary=False) + self.aggregate = KeyedVectors.load_word2vec_format(filepath, binary=False) self.logger = logbook.Logger(self.__class__.__name__) - vocab_lens = [len(vocab) for vocab in self.aggregate.vocab.keys()] + vocab_lens = [len(vocab) for vocab in self.aggregate.index_to_key] self.k_low = min(vocab_lens) self.k_high = max(vocab_lens) self.vec_dim = self.aggregate.vector_size @@ -45,7 +45,7 @@ def l2_norm(self, vocab): return np.linalg.norm(self.vector(vocab)) def separate_out_model(self, k_len): - vocabs = [vocab for vocab in self.aggregate.vocab.keys() if len(vocab) == k_len] + vocabs = [vocab for vocab in self.aggregate.index_to_key if len(vocab) == k_len] if len(vocabs) != 4 ** k_len: self.logger.warn('Missing {}-mers: {} / {}'.format(k_len, len(vocabs), 4 ** k_len)) @@ -56,4 +56,4 @@ def separate_out_model(self, k_len): vec_str = ' '.join("%f" % val for val in self.aggregate[vocab]) print('{} {}'.format(vocab, vec_str), file=fptr) fptr.flush() - return SingleKModel(word2vec.Word2Vec.load_word2vec_format(fptr.name, binary=False)) + return SingleKModel(KeyedVectors.load_word2vec_format(fptr.name, binary=False)) diff --git a/requirements.txt b/requirements.txt index 14b5c6a..2f2128e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ biopython==1.68 boto==2.46.1 bz2file==0.98 ConfigArgParse==0.11.0 -gensim==0.13.2 +gensim==4.1.2 Logbook==1.0.0 numpy==1.16 pep8==1.7.0 @@ -11,8 +11,8 @@ pluggy==0.4.0 py==1.4.33 pytest==3.0.7 python-dateutil==2.6.0 -requests==2.20.0 -scipy==0.19.0 +requests==2.20 +scipy==1.71 six==1.10.0 smart-open==1.5.1 tox==2.7.0