diff --git a/bin/RapClust b/bin/RapClust index eae743a..9c4b22a 100755 --- a/bin/RapClust +++ b/bin/RapClust @@ -3,7 +3,7 @@ import sys import os import logging import coloredlogs - +import itertools import click import argparse import yaml diff --git a/rapclust/eqnet.py b/rapclust/eqnet.py index c7c589e..5886e30 100644 --- a/rapclust/eqnet.py +++ b/rapclust/eqnet.py @@ -1,10 +1,8 @@ -from __future__ import print_function def buildNetFile(sampdirs, netfile, cutoff, auxDir, writecomponents=False): - import itertools import pandas as pd import numpy as np - import os + import os,itertools import logging logger = logging.getLogger("rapclust") @@ -22,7 +20,7 @@ def buildNetFile(sampdirs, netfile, cutoff, auxDir, writecomponents=False): numSamp = 0 tot = 0 eqClasses = {} - for sffile, eqfile in itertools.izip(sffiles, eqfiles): + for sffile, eqfile in zip(sffiles, eqfiles): quant = pd.read_table(sffile) quant.set_index('Name', inplace=True) @@ -32,13 +30,13 @@ def buildNetFile(sampdirs, netfile, cutoff, auxDir, writecomponents=False): numEq = int(ifile.readline().rstrip()) logging.info("quant file: {}; eq file: {}; # tran = {}; # eq = {}".format(sffile, eqfile, numTran, numEq)) if firstSamp: - for i in xrange(numTran): + for i in range(numTran): tnames.append(ifile.readline().rstrip()) diagCounts = np.zeros(len(tnames)) sumCounts = np.zeros(len(tnames)) ambigCounts = np.zeros(len(tnames)) else: - for i in xrange(numTran): + for i in range(numTran): ifile.readline() # for easy access to quantities of interest @@ -48,8 +46,8 @@ def buildNetFile(sampdirs, netfile, cutoff, auxDir, writecomponents=False): epsilon = np.finfo(float).eps sumCounts = np.maximum(sumCounts, estCount) - for i in xrange(numEq): - toks = map(int, ifile.readline().rstrip().split('\t')) + for i in range(numEq): + toks = list(map(int, ifile.readline().rstrip().split('\t'))) nt = toks[0] tids = tuple(toks[1:-1]) count = toks[-1] @@ -85,7 +83,7 @@ def buildNetFile(sampdirs, netfile, cutoff, auxDir, writecomponents=False): # Go through the weightMap and remove any edges that # have endpoints with too few mapping reads ## - for k,v in weightDict.iteritems(): + for k,v in weightDict.items(): c0, c1 = diagCounts[k[0]], diagCounts[k[1]] a0, a1 = ambigCounts[k[0]], ambigCounts[k[1]] if a0 + a1 > epsilon and a0 > cutoff and a1 > cutoff: @@ -119,8 +117,8 @@ def nearEnd(tup): for olfile in orphanLinkFiles: for l in open(olfile): left, right = l.rstrip().split(':') - lp = [map(int, i.split(',')) for i in left.rstrip('\t').split('\t')] - rp = [map(int, i.split(',')) for i in right.split('\t')] + lp = [list(map(int, i.split(','))) for i in left.rstrip('\t').split('\t')] + rp = [list(map(int, i.split(','))) for i in right.split('\t')] lp = [t[0] for t in filter(nearEnd, lp)] rp = [t[0] for t in filter(nearEnd, rp)] if len(lp) == 1 and len(rp) == 1: @@ -139,7 +137,7 @@ def nearEnd(tup): tnamesFilt = [] relabel = {} - for i in xrange(len(estCount)): + for i in range(len(estCount)): if (diagCounts[i] > cutoff): relabel[i] = len(tnamesFilt) tnamesFilt.append(tnames[i]) @@ -160,7 +158,7 @@ def nearEnd(tup): def writeEdgeList(weightDict, tnames, ofile, G): useGraph = G is not None - for k,v in weightDict.iteritems(): + for k,v in weightDict.items(): ofile.write("{}\t{}\t{}\n".format(tnames[k[0]], tnames[k[1]], v)) if useGraph: G.add_edge(tnames[k[0]], tnames[k[1]]) @@ -173,7 +171,7 @@ def writePajek(weightDict, tnames, relabel, ofile): ofile.write("{}\t\"{}\"\n".format(i, n)) ofile.write("*Edges\n") print("There are {} edges\n".format(len(weightDict))) - for k,v in weightDict.iteritems(): + for k,v in weightDict.items(): ofile.write("{}\t{}\t{}\n".format(relabel[k[0]], relabel[k[1]], v)) #ofile.write("{}\t{}\t{}\n".format(tnames[k[0]], tnames[k[1]], v)) #if k[0] != k[1]: @@ -202,15 +200,15 @@ def readEqClass(eqfile, eqCollection): print("file: {}; # tran = {}; # eq = {}".format(eqfile, numTran, numEq)) if not eqCollection.hasNames: tnames = [] - for i in xrange(numTran): + for i in range(numTran): tnames.append(ifile.readline().rstrip()) eqCollection.setNames(tnames) else: - for i in xrange(numTran): + for i in range(numTran): ifile.readline() - for i in xrange(numEq): - toks = map(int, ifile.readline().rstrip().split('\t')) + for i in range(numEq): + toks = list(map(int, ifile.readline().rstrip().split('\t'))) nt = toks[0] tids = tuple(toks[1:-1]) count = toks[-1] @@ -219,7 +217,7 @@ def readEqClass(eqfile, eqCollection): def getCountsFromEquiv(eqCollection): countDict = {} tn = eqCollection.tnames - for tids, count in eqCollection.eqClasses.iteritems(): + for tids, count in eqCollection.eqClasses.items(): for t in tids: if tn[t] in countDict: countDict[tn[t]] += count @@ -251,7 +249,7 @@ def filterGraph(expDict, netfile, ofile, auxDir): logger = logging.getLogger("rapclust") # Get just the set of condition names - conditions = expDict.keys() + conditions = list(expDict.keys()) logging.info("conditions = {}".format(conditions)) #for cond in conditions: @@ -273,7 +271,7 @@ def filterGraph(expDict, netfile, ofile, auxDir): eqClasses = {} for cond in conditions: print(expDict[cond]) - for sampNum, sampPath in expDict[cond].iteritems(): + for sampNum, sampPath in expDict[cond].items(): if cond not in eqClasses: eqClasses[cond] = EquivCollection() eqPath = os.path.sep.join([sampPath, auxDir, "/eq_classes.txt"]) @@ -290,7 +288,7 @@ def filterGraph(expDict, netfile, ofile, auxDir): numTrimmed = 0 with open(netfile) as f, open(ofile, 'w') as ofile: data = pd.read_table(f, header=None) - for i in tqdm(range(len(data))): + for i in tqdm(list(range(len(data)))): count += 1 #print("\r{} done".format(count), end="") #Alternative hypo @@ -322,6 +320,3 @@ def filterGraph(expDict, netfile, ofile, auxDir): else: numTrimmed += 1 logging.info("Trimmed {} edges".format(numTrimmed)) - - -