diff --git a/src/homeworks/group-project-em-canopy/CanEM.py b/src/homeworks/group-project-em-canopy/CanEM.py
new file mode 100644
index 0000000..f5b9c0a
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/CanEM.py
@@ -0,0 +1,118 @@
+'''
+Created on May 18, 2011
+
+@author: CanEM Team
+'''
+from mr_CanopyIterate import MrCanopy
+#from mr_AssignCentToCan import MrAssignCentToCan
+from mr_GMixEmInitialize import MrGMixEmInit
+from mr_GMixEmIterate import MrGMixEm
+import json
+from math import sqrt
+import os 
+
+def dist(x,y):
+    #euclidean distance between two lists    
+    sum = 0.0
+    for i in range(len(x)):
+        temp = x[i] - y[i]
+        sum += temp * temp
+    return sqrt(sum)
+
+
+'''
+Canopy EM for gaussian mixture model.  
+sequence of events
+1.  Generate Canopy with parameter t2   (mr_CanopyIterate.py)
+2.  initialize with modified kmeans initializer (mr_GmixEmInitialize.py)
+2.  generate 1/0 initial weight vector based on cluster membership (mr_GmixEmInitialize.py)
+3.  run through calc to generate first set of phi, mu, sigma (probably sigma inverse) (mr_GmixEmInitialize.py)
+4.  iteration - if a data entry is in the same canopy with a cluster's mean (determined by parameter t1), 
+               then 
+                    mapper employs phi, mu, sigma calculated in reducer to calc weights for input examples
+                    and generates partial sums for phi, mu, sigma inverse calc.  
+               otherwise  
+                    mapper directly assigns a very very small value as the weight and ignore this point when calculating 
+                    partial sums for phi, mu, sigma inverse calc
+
+'''
+
+def main():
+    
+    #data path parameters
+    filePath = os.getcwd() + "/data/"
+    inputDataName="input.txt"        #the dataset you want to rung clustering
+    intermediateDataName="intermediateResults.txt"    #intermediate file for EM
+    canopyList="canopylist.txt"          # list of canopy centers
+    #canopyCentroidAssign="canopyCentroidAssign.txt"  
+    
+    print 'Canopy-EM cluster by CanEM Team'   
+    
+    #Generate Canopies
+    print 'Generating Canopies...'   
+    #canopyforEM=[] 
+    mrJob0 = MrCanopy(args=[filePath+inputDataName])
+    with mrJob0.make_runner() as runner:
+            runner.run()
+            for line in runner.stream_output():
+                key, value = mrJob0.parse_output_line(line) #only one key; so only one line
+                #canopyforEM.append(value)
+    
+    #write canopies to file
+    canOut = json.dumps(value)
+    fileOut = open(filePath+canopyList,'w')
+    fileOut.write(canOut)
+    fileOut.close()
+
+
+    #Run the EM initializer to get starting centroids
+    print 'Initializing...'
+
+    mrJob = MrGMixEmInit(args=[filePath+inputDataName])
+    with mrJob.make_runner() as runner:
+        runner.run()
+    
+    #pull out the centroid values to compare with values after one iteration
+    fileIn = open(filePath+intermediateDataName)
+    paramJson = fileIn.read()
+    fileIn.close()
+    
+    delta = 10
+    #Begin iteration on change in centroids
+    print 'Iterating...'
+    while delta > 0.01:
+        
+        
+#        #assign centroid to canopy
+#        mrJob3 = MrAssignCentToCan(args=[filePath+intermediateDataName])
+#        with mrJob3.make_runner() as runner:
+#            runner.run()
+#        
+        
+        
+        
+        #parse old centroid values
+        oldParam = json.loads(paramJson)
+        #run one iteration
+        oldMeans = oldParam[1]
+        mrJob2 = MrGMixEm(args=[filePath+inputDataName])
+        with mrJob2.make_runner() as runner:
+            runner.run()
+            
+        #compare new centroids to old ones
+        fileIn = open(filePath+intermediateDataName)
+        paramJson = fileIn.read()
+        fileIn.close()
+        newParam = json.loads(paramJson)
+        
+        k_means = len(newParam[1])
+        newMeans = newParam[1]
+        
+        delta = 0.0
+        for i in range(k_means):
+            delta += dist(newMeans[i],oldMeans[i])
+        
+        print delta
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/data/canopylist.txt b/src/homeworks/group-project-em-canopy/data/canopylist.txt
new file mode 100644
index 0000000..9ab20a4
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/data/canopylist.txt
@@ -0,0 +1 @@
+[[-0.86606271111399113, 2.4785892178040405, 2.048832479159195], [0.52493662840807942, 11.135944654392654, 7.5693031846013881], [1.3772621783821584, -2.5544339440627342, 1.2711656717435735], [1.4260688085321982, 0.018061462509647852, 4.2010718161054008], [5.7580804354507844, 8.8661994432020936, 9.0839416717510808], [5.7875723179664984, 2.0407563452592936, 8.1923326983433054], [6.7721586517684944, -1.7476887592774151, 5.4879186836056659]]
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/data/input.txt b/src/homeworks/group-project-em-canopy/data/input.txt
new file mode 100644
index 0000000..38cbb77
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/data/input.txt
@@ -0,0 +1,200 @@
+[5.7875723179664984, 2.0407563452592936, 8.1923326983433054]
+[1.8492517580532633, 8.7507222863878695, 8.4261279689317181]
+[5.7580804354507844, 8.8661994432020936, 9.0839416717510808]
+[-0.9647996612820231, -0.56986578299873869, 1.6402511642815365]
+[5.4519472612912594, 2.2296960334027585, 6.6452481972115365]
+[2.2835357773872649, 9.5834515166256988, 9.5277033001269604]
+[5.9178596151523388, 9.041252742525721, 9.0170263912426645]
+[-1.0983204427153335, -0.36377188171082864, 1.8593392381971416]
+[5.4611267469525675, 2.983548696992552, 6.6516616275848577]
+[0.31318406396728005, 9.7020669433537279, 9.2602899066391764]
+[7.3363819512567421, 8.2576855647705951, 8.3702041008873529]
+[2.1421530768633081, 0.3116636543395771, 1.423063211123057]
+[5.3172601894841778, 1.7839235499456993, 5.5058157871154796]
+[2.2632830063782512, 8.9992678096486056, 8.002258534788794]
+[6.0822195614339858, 8.9212468114619998, 10.577863411044518]
+[1.3772621783821584, -2.5544339440627342, 1.2711656717435735]
+[6.1313861705351123, 1.0540163594233787, 5.2525297293248006]
+[2.2966814199535386, 11.077872323669402, 9.1184319673456411]
+[8.138512325044438, 8.973065824451222, 11.062012984720656]
+[1.2199074827767888, 1.927145360089787, 1.5961194566163051]
+[4.0035442185526691, 1.9527253516368541, 6.0991541104079552]
+[1.1179673551902645, 9.7336197216752289, 6.894852068777392]
+[4.395840893704646, 8.900818974564249, 8.7650148580060261]
+[-1.4132855725887907, 0.85208111695028443, 2.5305908958792154]
+[5.3346512628853064, 1.9428190662561047, 5.7152004192549501]
+[0.69725380804318315, 8.9520692616153816, 8.5427885322575037]
+[8.1584067309228612, 9.0090035385883755, 10.343036621736838]
+[1.4967285350076551, 0.065581990502639853, 1.3549439980875957]
+[6.8777894630599885, 0.62209423195250579, 6.472569682245898]
+[0.95668072232654633, 7.6209018559743189, 8.985512632008458]
+[6.9944389268323164, 9.7378002999337561, 10.220988893328242]
+[1.4260688085321982, 0.018061462509647852, 4.2010718161054008]
+[4.8632691438857902, 2.6613226140226276, 6.7156449625085877]
+[0.48151739403603866, 7.7938823153165711, 7.8974514842655612]
+[7.2915245127304971, 9.8115570238105434, 9.0714843448828066]
+[1.4652095828152498, 0.44579377797449321, -0.07233465143837492]
+[6.812761336333832, 2.2238873665644703, 5.5576741723066778]
+[0.97682085111399086, 10.514953541165529, 7.1832949997082434]
+[6.8624594168595969, 8.0135036376169673, 9.3871429998064357]
+[0.015707131662096863, -0.30827442168313324, 1.8587417898365051]
+[7.9288898656061919, 1.8563933328360918, 6.9762008742675752]
+[1.4914860350356633, 11.500176607088751, 8.4190544994905565]
+[7.1280521973407582, 8.4792520984037498, 8.2575774130035917]
+[-0.26893400252118227, 0.37374870468451887, 0.0534514177478147]
+[6.0001607055528705, 1.2938023032821895, 6.3343044725326552]
+[3.7868695323031183, 8.909636986983033, 8.5203999174083531]
+[7.4765407775797339, 10.225726823165623, 7.7956626295425098]
+[-0.86606271111399113, 2.4785892178040405, 2.048832479159195]
+[5.342404750853313, 3.1789956704993765, 7.7544672726303885]
+[1.4824132389345142, 8.367691145563187, 8.8436898039346854]
+[9.334603708692363, 8.4671819479475658, 9.5733704552267351]
+[-1.5079498716051036, -0.045506510539089051, 2.0834398544168122]
+[7.9448369254002067, 1.7202217313763217, 6.2650739502680191]
+[1.6064528714674209, 7.2514446393023331, 8.6883489884610174]
+[8.4015816257346199, 9.6458906085041161, 7.427447019199203]
+[2.0228314972181538, -0.036919624927565675, 1.6419058486541978]
+[6.0238770962323178, 2.3093048145548352, 5.8059304690266016]
+[2.2600905947309968, 9.2184992735622, 7.8097520617893519]
+[5.7355200687261476, 9.7457264195500404, 8.3877351114515193]
+[0.68518272799590174, 1.5715059930326469, 2.5722247885445388]
+[8.2440750876389117, 2.5892744925738542, 4.5234815854385735]
+[0.66768328790205644, 9.7029069449721277, 8.6445782998719096]
+[7.7532838025725015, 10.312752740841205, 8.5236326907291922]
+[1.139871837810813, 0.42195347669474997, 2.3423897768509576]
+[5.6685010281386674, -0.036226222358152205, 5.940615105647451]
+[0.36034917521284582, 8.8027726177666015, 7.6954224144409036]
+[7.6179104250291187, 5.9720945326133048, 8.8064771572166745]
+[1.9861607820042351, -0.50955156098130372, 2.3269836026994288]
+[7.4049876556928513, 1.1328985089231141, 7.874232055607937]
+[3.1454296780647701, 9.46775818583132, 8.175649204677983]
+[7.1146056947462899, 10.604154757729102, 9.746645187231092]
+[2.0770026048296328, 3.634319484716845, 2.1322895501306354]
+[5.965435709749686, 2.8081277004594916, 8.0729588763106133]
+[2.1314228264393318, 10.817862453167258, 8.2079917175134192]
+[8.1793713600664102, 9.7828821938319983, 8.6801935772237524]
+[2.6870920100076563, 1.3761379972888728, 1.427326718400185]
+[5.124280147582021, 1.3680772330895516, 7.0703788180928102]
+[1.1886875679255082, 9.3687564082190722, 8.7811549751757312]
+[7.1440970699179749, 9.302079168839871, 7.649475933899919]
+[0.82520890902297761, 0.26596694563259882, 2.4265279623442284]
+[6.0261229050814347, 1.862005069025209, 7.1943247793777836]
+[2.8170157145299708, 9.9009625520246516, 7.1472002024513159]
+[6.4906722807759243, 7.7628293406847133, 10.593106311283872]
+[1.4417608281950198, 1.4310270592215055, 3.0545010363438383]
+[6.7721586517684944, -1.7476887592774151, 5.4879186836056659]
+[2.005804136238257, 7.2503121435147326, 7.2859261888553926]
+[7.4257852032119711, 8.8089941483513385, 7.5867556164727814]
+[0.54204360819774844, -0.07111192959564927, 3.6884780376091402]
+[4.5470368165534429, 2.9674215331202394, 6.0173478954870836]
+[1.6430191212638228, 10.454964576875005, 8.3755759059295993]
+[6.3417567003874709, 6.7741397330232127, 8.4566577563261287]
+[-0.31201899856273541, -0.42026923659078763, 2.7580161550180193]
+[5.0835614277792169, 1.0356196146148906, 7.4525611435889383]
+[2.0155029269759615, 9.1580588364568491, 9.0132777123343839]
+[5.3509981576460559, 10.577451889028499, 7.9478919836484172]
+[1.7158826088811572, -0.87825228199186456, 2.4446252180505583]
+[6.3223939720785536, 1.2972315514885251, 7.0541124546453124]
+[3.3183009864316837, 8.8486906002964982, 9.4035600375653541]
+[6.3864364141750434, 8.7503902508547942, 7.9178914050815772]
+[1.4120770974881269, -0.0071015027210956094, 2.8374648151126522]
+[6.2466036569977792, 0.41460163503891256, 7.8447975285968941]
+[1.6313456038341623, 9.4983297246237619, 7.3353028133828424]
+[5.3416808041333432, 10.727055327695645, 9.3211786304791016]
+[1.5666888829628081, 1.6040178781057397, 3.1338113380637793]
+[4.6019764904785703, -0.038260405210230886, 6.0834025521761212]
+[0.59369786378995704, 7.719427245596882, 8.3773258288317685]
+[5.7519061601592254, 9.145999596091011, 9.8272118109997475]
+[-0.54765088369338655, -0.45733939241461186, 1.6975849454574181]
+[7.3048708211552178, 2.5336839273582754, 8.1755914939979704]
+[3.3069847262455081, 9.2790457273604119, 8.0046740167653248]
+[7.4233294337624924, 9.9559576851717004, 7.7553310842637941]
+[2.0191584930600861, -0.63654961771361984, 2.463648917862352]
+[7.0335757691389205, 2.5151057979855369, 6.5465983045702032]
+[2.1587307649466752, 9.8990566338321244, 7.1986859280706019]
+[5.2478826276237136, 9.2965339316970681, 8.4786265437582156]
+[1.2321859515518465, -0.68336790945647841, 3.4478222029690966]
+[4.9514320395520706, 0.018683723869228563, 5.8612850613880259]
+[2.1889424743933583, 9.4005923991608338, 8.3424081823260554]
+[7.6696603443217235, 9.2433947737673883, 10.158288992950526]
+[-1.6184973142864489, 0.3394090624324988, 1.92760450911888]
+[4.7305323633250467, 1.5039096001554173, 7.4279883476595838]
+[0.52493662840807942, 11.135944654392654, 7.5693031846013881]
+[8.4034792827698421, 10.179008123060433, 8.908276333038506]
+[2.212493857702051, 0.76708947799844962, 1.5450908273723785]
+[6.3902352222114329, 2.1034297261763886, 6.438740791395106]
+[4.0789195767931288, 9.0919211657806223, 7.6757374595360419]
+[6.8736078326856545, 9.0005854347205219, 9.6864756349170591]
+[0.31693697594633863, 1.403936833188951, 1.604478183723121]
+[5.7326922412547523, 2.7248747993458777, 6.9437143839524058]
+[2.1304676694185023, 8.6461128495709456, 7.8180605511848889]
+[6.8385812582345604, 7.1233820211367957, 9.8346130646878311]
+[1.1470502014491077, 2.0455818853395185, 1.5980395875327971]
+[2.9630154990666027, -0.65138619573957324, 6.6078990781189422]
+[2.0213196000847868, 9.2274943952837987, 6.4318494545791474]
+[6.2790622810712664, 8.2385118690709191, 8.9295616228800725]
+[0.94434454122599842, 0.93145103630859338, 1.7552815845385414]
+[6.0747483226065615, 1.7703377690703817, 7.036631601398657]
+[0.93674737273367525, 8.0884420035553202, 7.7454963101822267]
+[7.6890359178720384, 9.3272858598415365, 8.7752340624546719]
+[-0.75459315195634757, 1.0421655903758564, 0.29708873929426094]
+[5.9547711826175194, 1.4842849696926446, 7.4279257480674028]
+[1.0341106111370226, 9.0298386276522518, 8.3599845827062964]
+[8.9986077040595998, 9.0791089481238032, 8.3834818658126693]
+[1.8809995135383482, 1.9008154978719334, 1.7627151554626879]
+[5.7866930841691646, 0.87015324923171045, 7.4112162191327036]
+[3.7044545558658681, 10.506907192830052, 7.5505897703282399]
+[8.6527726098204525, 9.0458516102240907, 9.2664408892830448]
+[1.9018276482480221, 2.063714856627795, 1.6045231069806056]
+[5.9018847173118498, 1.6862098991130774, 6.0377589804721126]
+[1.2962380839934469, 10.101170222783974, 7.8265069199949586]
+[7.0633920119343383, 9.008315108499378, 8.4429509584039799]
+[0.90760344899479939, -0.31064717361203387, 1.1369815860053496]
+[5.7199798098790389, 1.3424294552651099, 5.2576416317613619]
+[3.2316574431166893, 10.46252881973081, 6.8912153778452065]
+[7.3470929474803928, 9.9790417585052573, 8.995377304744677]
+[-0.027777371815487728, 0.94363607925718485, 2.6986566574572324]
+[5.8584381289619456, 1.4192446853943124, 8.3771650469931611]
+[2.1690371908011898, 8.7082521768453347, 8.2961749057920819]
+[7.8504838635229204, 8.6856251707392023, 7.4176845820386994]
+[-0.24674775206134825, -0.02462228588349813, 2.2288520579098132]
+[5.7637342801602545, 1.6455808827358347, 5.7317239321473545]
+[0.01210685609654405, 11.360616649312488, 8.5851624063324827]
+[6.853687830806984, 10.465036219457282, 9.0745041859558082]
+[1.0535994341971358, 2.0751987863273449, 0.82127384888914234]
+[5.781079642062064, 0.94648842736451344, 5.7461598484560987]
+[0.33474625056653395, 9.1061661520229951, 9.5292964916256278]
+[7.6676510432430804, 9.2127660158230213, 6.6309722923215997]
+[-0.39480362747664466, 0.85124623086196483, 1.5445112515339587]
+[6.3745803892614328, 1.473796250587353, 6.3410398954712957]
+[2.3502095194157273, 10.252700296353535, 7.1808685108147072]
+[8.0341726154392461, 7.3138140919160675, 8.3812716550574891]
+[-0.16684493542666928, 0.49657675813955637, 1.8757094164375783]
+[5.2108218384847147, 0.73825670499718898, 6.2341725627555604]
+[2.6242307709517765, 9.3458838907513417, 8.6106642310203672]
+[7.016050623298395, 9.0252857944135165, 7.8757170934483174]
+[-0.35529801152002138, 0.51866738296312009, 1.0854221760262766]
+[6.3614465724821621, 1.6353762278526245, 6.8947994623788462]
+[2.3165544921960932, 9.4453482704881448, 8.2706576895427109]
+[6.5473083834801491, 7.9873129672362424, 7.4731656038867067]
+[1.526158600791925, 0.2738434862268273, 1.3003768541021787]
+[6.4999279999144006, 2.513424558789811, 5.9311731664298337]
+[0.75413542735571837, 9.1056098014213838, 9.9956286695072887]
+[5.3663270437960504, 7.3968342213207556, 7.5424583982384288]
+[-0.64378984531077266, 1.0875755934804177, 0.70577698684384449]
+[5.5196721527242056, 1.5116929360566957, 8.0219723182565055]
+[1.9702474007434552, 9.7559220426155449, 7.3676670003678817]
+[7.0883262151693982, 9.0602537840321844, 8.1712903832007715]
+[-0.44332736710818688, 0.18455311920074147, 0.80817906296285125]
+[6.2257283035353943, 1.3174885460203565, 4.248113419472678]
+[2.8358834268335742, 9.6408504911374244, 7.4478488278119013]
+[7.9676398133356701, 7.6326290057279529, 9.6345551616575218]
+[1.8160715089488122, 0.14232757870136392, -0.17175844125882689]
+[6.1628755570672951, 1.5266458139161561, 4.3620647521239704]
+[0.58512283817250954, 9.9073643301140013, 8.1559893921268181]
+[7.5791843564891801, 9.862213120796298, 7.7210421860153922]
+[-0.86691547569314009, 0.53448000256716532, 3.3824088336091802]
+[6.8485992100899846, 2.5660848592791443, 6.4659675758886506]
+[3.0751329049717082, 8.8107193118627993, 8.0638888257012908]
+[7.8969938144433023, 11.080204191498879, 10.581530187458654]
+[0.074040303413835939, 0.074848468308284288, 1.9496653081904614]
diff --git a/src/homeworks/group-project-em-canopy/data/intermediateResults.txt b/src/homeworks/group-project-em-canopy/data/intermediateResults.txt
new file mode 100644
index 0000000..eb1479c
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/data/intermediateResults.txt
@@ -0,0 +1 @@
+[[0.50000491608284181, 0.24526628655517746, 0.055471250152603666, 0.19925754720937697], [[3.2823215953503295, 1.0528967294559894, 4.198057125109961], [7.1366289904711575, 9.039174250018748, 8.8105297921176096], [3.0478846588790915, 9.3152032812745809, 8.1908102807946079], [1.5403033979826148, 9.3931991480792938, 8.1533696604393295]], [[[0.48336601373346044, -0.15173049896849466, -0.44010948488852386], [-0.1517304989684948, 1.0238966993930796, -0.054579789795480019], [-0.44010948488852375, -0.054579789795480158, 0.59733888309658412]], [[1.0546734443117651, -0.026196110454155021, -0.044164317113212316], [-0.026196110454155024, 0.88887263162663066, -0.018857400777796081], [-0.044164317113212309, -0.018857400777796081, 0.99714498320608613]], [[7.6908233545328946, 24.465684423960923, 7.7489189962622618], [24.465684423960923, 101.39005840140184, 34.426074387092193], [7.7489189962622609, 34.426074387092193, 14.259405279982722]], [[1.4326116127364705, -0.087824516689787815, 0.51499800194303413], [-0.087824516689787829, 0.87440472077748688, 0.098049824806152958], [0.51499800194303424, 0.098049824806152958, 1.8158912625189798]]]]
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/inputGen.py b/src/homeworks/group-project-em-canopy/inputGen.py
new file mode 100644
index 0000000..43ff114
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/inputGen.py
@@ -0,0 +1,76 @@
+'''
+Created on Mar 18, 2011
+
+@author: mike-bowles
+'''
+
+
+from numpy import random
+import json
+import os
+
+#pathname="//home//mike-bowles//pyWorkspace//mapReducers//src//mr_kMeans2//"
+
+pathname = os.getcwd() + "/data/"
+#pathname="C:\\Users\\zhenyuyan\\Documents\\Hadoop\\pythonworkspace\\kMeans\\"
+filename="input.txt"
+fileOut=open(pathname+filename,"w") 
+#generate a 2-dim example.  5 centers picked randomly in (0,10) each with 
+#100 samples of gaussian unit variance samples
+
+
+#centers = []
+#ncenters = 5
+#for i in range(ncenters):
+#    x = 10*random.uniform()
+#    y = 10*random.uniform()
+#    centers.append([x,y])
+#    
+##centers = []
+##ncenters = 2
+##centers.append([0.0,0.0])
+##centers.append([2.0,2.0])
+#print centers
+#for i in range(100):
+#    for j in range(ncenters):
+#        xm = centers[j][0]
+#        ym = centers[j][1]
+#        x = random.normal(xm,1.0,1)[0]
+#        y = random.normal(ym,1.0,1)[0]
+#        outString = json.dumps([x,y]) + "\n"
+#        fileOut.write(outString)
+#        
+#fileOut.close()
+
+
+centers = []
+ncenters = 4
+ndim=3
+npoints=50
+
+for i in range(ncenters):
+    c=[0.0]*ndim 
+    for j in range(ndim):
+        c[j] = 10*random.uniform()
+        #x.append[temp]
+    centers.append(c)
+
+#centers = []
+#ncenters = 2
+#centers.append([0.0,0.0])
+#centers.append([3.0,3.0])
+
+
+print centers
+for i in range(npoints):
+    for j in range(ncenters):
+        x=[0.0]*ndim 
+        for k in range(ndim):
+            x[k]=random.normal(centers[j][k],1.0,1)[0]
+        outString = json.dumps(x) + "\n"
+        fileOut.write(outString)
+        
+fileOut.close()
+
+
+
diff --git a/src/homeworks/group-project-em-canopy/mr_CanopyIterate.py b/src/homeworks/group-project-em-canopy/mr_CanopyIterate.py
new file mode 100644
index 0000000..a3e43bb
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/mr_CanopyIterate.py
@@ -0,0 +1,99 @@
+'''
+Created on Apr 18, 2011
+
+
+'''
+from mrjob.job import MRJob
+
+from math import sqrt  #, exp, pow,pi
+from numpy import zeros, shape, random, array, zeros_like, dot, linalg
+import json
+import os
+
+def dist(x,y):
+    #euclidean distance between two lists    
+    sum = 0.0
+    for i in range(len(x)):
+        temp = x[i] - y[i]
+        sum += temp * temp
+    return sqrt(sum)
+
+
+#def gauss(x, mu, P_1):
+#    xtemp = x - mu
+#    n = len(x)
+#    p = exp(- 0.5*dot(xtemp,dot(P_1,xtemp)))
+#    detP = 1/linalg.det(P_1)
+#    p = p/(pow(2.0*pi,n/2.0)*sqrt(detP))
+#    return p
+
+class MrCanopy(MRJob):
+    DEFAULT_PROTOCOL = 'json'
+    
+    def __init__(self, *args, **kwargs):
+        super(MrCanopy, self).__init__(*args, **kwargs)
+#        
+        self.canopyCenters =[]
+                                                 
+    def configure_options(self):
+        super(MrCanopy, self).configure_options()
+
+        self.add_passthrough_option(
+            '--k', dest='k', default=4, type='int',
+            help='k: number of densities in mixture')
+        self.add_passthrough_option(
+            '--t2', dest='t2', default=3.5, type='float',
+            help='t2: inner circle distance')
+        self.add_passthrough_option(
+            '--pathName', dest='pathName', default=os.getcwd()+'/data/', type='str',
+            help='pathName: pathname where intermediateResults.txt is stored')
+        
+    def mapper(self, key, val):
+        #accumulate partial sums for each mapper
+        
+
+        
+        x = json.loads(val)
+        
+        if len(self.canopyCenters)==0:
+            self.canopyCenters.append(x)
+            yield 1,x
+        else:
+            iscenter=True
+            for item in self.canopyCenters:
+                if dist(array(x),item) <=self.options.t2*0.8:  #use a value smaller than t2
+                    iscenter=False
+                    break
+            if iscenter==True:
+                self.canopyCenters.append(x)
+                yield 1,x    
+        
+#    def mapper_final(self):
+#        
+#        out = [self.count, (self.new_phi).tolist(), (self.new_means).tolist(), (self.new_cov).tolist()]
+#        jOut = json.dumps(out)        
+#        
+#        yield 1,jOut
+    
+    
+    def reducer(self, key, xs):
+        
+        canopyCentersReducer=[]
+        
+        for x in xs:
+            if len(canopyCentersReducer)==0:
+                canopyCentersReducer.append(x)
+                #yield 1,x
+            else:
+                iscenter=True
+                for item in canopyCentersReducer:
+                    if dist(array(x),item) <=self.options.t2:  #use real t2
+                        iscenter=False
+                if iscenter==True:
+                    canopyCentersReducer.append(x)
+                #yield 1,x
+        yield 1, canopyCentersReducer
+        
+
+if __name__ == '__main__':
+    MrCanopy.run()
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/mr_CanopyIterate.pyc b/src/homeworks/group-project-em-canopy/mr_CanopyIterate.pyc
new file mode 100644
index 0000000..1364e4d
Binary files /dev/null and b/src/homeworks/group-project-em-canopy/mr_CanopyIterate.pyc differ
diff --git a/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.py b/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.py
new file mode 100644
index 0000000..ad066f4
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.py
@@ -0,0 +1,102 @@
+'''
+Created on Apr 18, 2011
+
+'''
+from mrjob.job import MRJob
+
+from numpy import mat, zeros, shape, random, array, zeros_like, dot, linalg
+from random import sample
+import json
+from math import pi, sqrt, exp, pow
+import os
+
+class MrGMixEmInit(MRJob):
+    DEFAULT_PROTOCOL = 'json'
+    
+    def __init__(self, *args, **kwargs):
+        super(MrGMixEmInit, self).__init__(*args, **kwargs)
+        
+        self.numMappers = 1     #number of mappers
+        self.count = 0
+        
+                                                 
+    def configure_options(self):
+        super(MrGMixEmInit, self).configure_options()
+        self.add_passthrough_option(
+            '--k', dest='k', default=4, type='int',
+            help='k: number of densities in mixture')
+        self.add_passthrough_option(
+            '--pathName', dest='pathName', default= os.getcwd()+'/data/', type='str',
+            help='pathName: pathname where intermediateResults.txt is stored')
+        
+    def mapper(self, key, xjIn):
+        #something simple to grab random starting point
+        #collect the first 2k
+        if self.count <= 2*self.options.k:
+            self.count += 1
+            yield (1,xjIn)        
+        
+    def reducer(self, key, xjIn):        
+        #accumulate data points mapped to 0 from 1st mapper and pull out k of them as starting point
+        cent = []
+        for xj in xjIn:
+            x = json.loads(xj)
+            cent.append(x)
+            yield 1, xj
+        index = sample(range(len(cent)), self.options.k)
+        cent2 = []
+        for i in index:
+            cent2.append(cent[i])
+        
+        
+            
+        #use the covariance of the selected centers as the starting guess for covariances
+        #first, calculate mean of centers
+        
+        mean = array(cent2[0])
+        for i in range(1,self.options.k):
+            mean = mean + array(cent2[i])
+        mean = mean/float(self.options.k)
+        
+       
+        
+        #then accumulate the deviations
+        cov = zeros((len(mean),len(mean)),dtype=float)
+        for x in cent2:
+            xmm = array(x) - mean
+            for i in range(len(mean)):
+                cov[i,i] = cov[i,i] + xmm[i]*xmm[i]
+                
+        cov = cov/(float(self.options.k))
+        covInv = linalg.inv(cov)
+        
+        cov_1 = [covInv.tolist()]*self.options.k
+        
+#        jDebug = json.dumps([cent2,mean.tolist(),cov.tolist(),covInv.tolist(),cov_1])    
+#        debugPath = self.options.pathName + 'debug.txt'
+#        fileOut = open(debugPath,'w')
+#        fileOut.write(jDebug)
+#        fileOut.close()
+        
+        #also need a starting guess at the phi's - prior probabilities
+        #initialize them all with the same number - 1/k - equally probably for each cluster
+        
+        phi = zeros(self.options.k,dtype=float)
+        
+        for i in range(self.options.k):
+            phi[i] = 1.0/float(self.options.k)
+        
+        #form output object
+        outputList = [phi.tolist(), cent2, cov_1]
+            
+        jsonOut  = json.dumps(outputList)
+        
+        #write new parameters to file
+        fullPath = self.options.pathName + 'intermediateResults.txt'
+        fileOut = open(fullPath,'w')
+        fileOut.write(jsonOut)
+        fileOut.close()
+        if False: yield 1,2
+
+if __name__ == '__main__':
+    MrGMixEmInit.run()
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.pyc b/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.pyc
new file mode 100644
index 0000000..80f07f1
Binary files /dev/null and b/src/homeworks/group-project-em-canopy/mr_GMixEmInitialize.pyc differ
diff --git a/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.py b/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.py
new file mode 100644
index 0000000..9e0bfa4
--- /dev/null
+++ b/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.py
@@ -0,0 +1,195 @@
+'''
+Created on Apr 18, 2011
+
+'''
+from mrjob.job import MRJob
+
+from math import sqrt, exp, pow,pi
+from numpy import zeros, shape, random, array, zeros_like, dot, linalg, add
+import json
+import os
+
+
+def dist(x,y):
+    #euclidean distance between two lists    
+    sum = 0.0
+    for i in range(len(x)):
+        temp = x[i] - y[i]
+        sum += temp * temp
+    return sqrt(sum)
+
+def gauss(x, mu, P_1):
+    xtemp = x - mu
+    n = len(x)
+    p = exp(- 0.5*dot(xtemp,dot(P_1,xtemp)))
+    detP = 1/linalg.det(P_1)
+    p = p/(pow(2.0*pi,n/2.0)*sqrt(detP))
+    return p
+
+class MrGMixEm(MRJob):
+    DEFAULT_PROTOCOL = 'json'
+    
+    def __init__(self, *args, **kwargs):
+        super(MrGMixEm, self).__init__(*args, **kwargs)
+        
+        fullPath = self.options.pathName + 'intermediateResults.txt'
+        fileIn = open(fullPath)
+        inputJson = fileIn.read()
+        fileIn.close()
+        inputList = json.loads(inputJson)
+        temp = inputList[0]        
+        self.phi = array(temp)           #prior class probabilities
+        temp = inputList[1]
+        self.means = array(temp)         #current means list
+        temp = inputList[2]
+        self.cov_1 = array(temp)         #inverse covariance matrices for w, calc.
+        #accumulate partial sums                               
+        #sum of weights - by cluster
+        self.new_phi = zeros_like(self.phi)        #partial weighted sum of weights
+        self.new_means = zeros_like(self.means)
+        self.new_cov = zeros_like(self.cov_1)
+        
+        self.numMappers = 1             #number of mappers
+        self.count = 0                  #passes through mapper
+        
+        #import Canopy list
+        canopyListPath= self.options.pathName + 'canopylist.txt'
+        fileIn = open(canopyListPath)
+        inputJson = fileIn.read()
+        fileIn.close()
+        self.canopyList = json.loads(inputJson)
+        
+        self.membership=[]            #assign means to canopy
+        
+#        print self.canopyList[1]
+#        print self.means
+#        jDebug = json.dumps([self.canopyList,self.means])    
+#        debugPath = self.options.pathName + 'debug2.txt'
+#        fileOut = open(debugPath,'w')
+#        fileOut.write(jDebug)
+#        fileOut.close()
+        
+        
+        for can in self.canopyList:
+            ismember=zeros(self.options.k)
+            i=0
+            for meanval in self.means:
+                #print can
+                #print meanval
+                if dist(array(can),meanval)<self.options.t1:
+                    ismember[i]=1
+                i=i+1
+            #print ismember
+            self.membership.append(ismember)
+        #print self.membership
+                  
+        
+                                                 
+    def configure_options(self):
+        super(MrGMixEm, self).configure_options()
+
+        self.add_passthrough_option(
+            '--k', dest='k', default=4, type='int',
+            help='k: number of densities in mixture')
+        self.add_passthrough_option(
+            '--t1', dest='t1', default=10.0, type='float',
+            help='t1: out circle distance')
+        self.add_passthrough_option(
+            '--pathName', dest='pathName', default=os.getcwd()+'/data/', type='str',
+            help='pathName: pathname where intermediateResults.txt is stored')
+        
+    def mapper(self, key, val):
+        #accumulate partial sums for each mapper
+        xList = json.loads(val)
+        x = array(xList)
+        
+        samecanopy= zeros(self.options.k)
+        i=0
+        for can in self.canopyList:
+            if dist(array(can),x)<self.options.t1:
+                samecanopy=add(samecanopy,self.membership[i])     
+            i=i+1
+        
+        
+        wtVect = zeros_like(self.phi)
+        for i in range(self.options.k):
+            wtVect[i]=0.000001
+            if samecanopy[i]>0: wtVect[i] = self.phi[i]*gauss(x,self.means[i],self.cov_1[i])
+                
+        wtSum = sum(wtVect)
+        wtVect = wtVect/wtSum
+        #accumulate to update est of probability densities.
+        #increment count
+        self.count += 1
+        #accumulate weights for phi est
+        self.new_phi = self.new_phi + wtVect
+        for i in range(self.options.k):
+            if samecanopy[i]>0:
+                #accumulate weighted x's for mean calc
+                self.new_means[i] = self.new_means[i] + wtVect[i]*x
+                #accumulate weighted squares for cov estimate
+                xmm = x - self.means[i]
+                covInc = zeros_like(self.new_cov[i])
+            
+                for l in range(len(xmm)):
+                    for m in range(len(xmm)):
+                        covInc[l][m] = xmm[l]*xmm[m]
+                self.new_cov[i] = self.new_cov[i] + wtVect[i]*covInc
+                    
+        
+        #dummy yield - real output passes to mapper_final in self
+        if False: yield 1,2
+        
+    def mapper_final(self):
+        
+        out = [self.count, (self.new_phi).tolist(), (self.new_means).tolist(), (self.new_cov).tolist()]
+        jOut = json.dumps(out)        
+        
+        yield 1,jOut
+    
+    
+    def reducer(self, key, xs):
+        #accumulate partial sums
+        first = True        
+        #accumulate partial sums
+        for val in xs:
+            if first:
+                temp = json.loads(val)
+                
+               
+                
+                totCount = temp[0]
+                totPhi = array(temp[1])
+                totMeans = array(temp[2])
+                totCov = array(temp[3])                
+                first = False
+            else:
+                temp = json.loads(val)
+                totCount = totCount + temp[0]
+                totPhi = totPhi + array(temp[1])
+                totMeans = totMeans + array(temp[2])
+                totCov = totCov + array(temp[3])
+        #finish calculation of new probability parameters
+        newPhi = totPhi/totCount
+        #initialize these to something handy to get the right size arrays
+        newMeans = totMeans
+        newCov_1 = totCov
+        for i in range(self.options.k):
+            newMeans[i,:] = totMeans[i,:]/totPhi[i]
+            tempCov = totCov[i,:,:]/totPhi[i]
+            #almost done.  just need to invert the cov matrix.  invert here to save doing a matrix inversion
+            #with every input data point.
+            newCov_1[i,:,:] = linalg.inv(tempCov)
+        
+        outputList = [newPhi.tolist(), newMeans.tolist(), newCov_1.tolist()]
+        jsonOut = json.dumps(outputList)
+        
+        #write new parameters to file
+        fullPath = self.options.pathName + 'intermediateResults.txt'
+        fileOut = open(fullPath,'w')
+        fileOut.write(jsonOut)
+        fileOut.close()
+        if False: yield 1,2
+
+if __name__ == '__main__':
+    MrGMixEm.run()
\ No newline at end of file
diff --git a/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.pyc b/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.pyc
new file mode 100644
index 0000000..cefa115
Binary files /dev/null and b/src/homeworks/group-project-em-canopy/mr_GMixEmIterate.pyc differ