-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunner.py
More file actions
163 lines (131 loc) · 6.18 KB
/
runner.py
File metadata and controls
163 lines (131 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import sys
import matplotlib.pyplot as plt
import project_functions as pf
import project_algorthims as pa
import monty
from cluster2d import cluster_svm as cluster
import numpy as np
import plottings
#test gating algorthim
if __name__ == "__main__":
print "Reading Data..."
sys_arguments = sys.argv
users, batch_score, users_time, batch_time = pf.readData(sys_arguments[1])
#run calculate_avg_score_per_batch on the global user scores
global_batch = pf.calculate_avg_score_per_batch(batch_score)
global_time = pf.calculate_avg_score_per_batch(batch_time)
#Prep for creation of beta weights
print "Working on prepping data for regression weights..."
regressionData = pf.regressionDataPrep(global_time,global_batch, users, users_time)
#this is the test parameter for a particular batch/project we are looking at so we exclude it from their aggregate score
#It will default to the largest batch in the file, but can be set at the command line by using the word batch and then putting the batch number
if len(sys_arguments) > 2:
if sys_arguments[2] == "batch":
mxBatch = int(sys_arguments[3])
if mxBatch not in global_batch:
print "there are no tasks in this batch, reverting to longest batch"
mxBatch = pf.chooseBatch(batch_score)
else:
mxBatch = pf.chooseBatch(batch_score)
else:
mxBatch = pf.chooseBatch(batch_score)
print '\033[1m' + "Current Batch: " +str(mxBatch) + '\033[0m'
#Create a user dictionary based of of individual users and global scores
scores, user_ten = pf.scoresLoop(users, users_time, mxBatch, global_batch, global_time)
#Create a matrix from Scores Dictionary that has UserID, Past Performance, Time Performance, and Current Score for each user
perfMat = pf.create_perf_arrays(scores)
### Allow for parameter to loop through to calculate average profit function
averagesum_2 = 0
averagesum_3 = 0
profit_sum2 = 0
profit_sum3 = 0
if "run-loop" in sys_arguments:
run = 30
loop = True
else:
run = 1
loop = False
for i in xrange(run):
#Calculate Centroids
#initial_centroids = np.array(([-0.5,0.5],[0,0],[0.5,-0.5]))
#initial_centroids2 = np.array(([-0.25,0.25],[0.25,-0.25]))
centroids = cluster(perfMat[:,1],perfMat[:,2], 3, "Past Score (Normalized)", "Average Time (Normalized)", False)
centroids2 = cluster(perfMat[:,1],perfMat[:,2], 2, "Past Score (Normalized)", "Average Time (Normalized)", False)
print " "
print '\033[1m' + "Centroid Cutoffs" '\033[0m'
print "3 Cluster Centroids"
print centroids
print "2 Cluster Centroids"
print centroids2
#With Centroids as thresholds run each alogithm with the test parameters per users
base = 200
weights = {"Current":1}
weights["Time"], weights["Past"] = pf.weightRegressions(regressionData)
print " "
print '\033[1m' + "Algorthim weights:" + '\033[0m'
print "Time Weight:" + str(weights["Time"])
print "Past Score Weight:" + str(weights["Past"])
#With our weights and centroids compute every user's estimated number of questions before
#gold using each of our 6 algorthims
questions = np.zeros((len(perfMat[:,1]),6))
for u in range(len(perfMat[:,1])):
alg_params = [perfMat[u,1], perfMat[u,3], perfMat[u,2], .98, centroids, weights, base]
#Old Algorthims
questions[u,0] = pa.StepWise(*alg_params)
questions[u,1] = pa.StepWisePenalty(perfMat[u,1], perfMat[u,3], perfMat[u,2], .98, centroids, weights, 200, 0.5)
questions[u,2] = pa.Attenuated(*alg_params)
questions[u,3] = pa.AttenuatedContinous(*alg_params)
#Attenuated Cluster Algorthims of Insterest
questions[u,4] = pa.centroidThreshold(*alg_params)
questions[u,5] = pa.centroidThreshold(perfMat[u,1], perfMat[u,3], perfMat[u,2], .98, centroids2, weights, base)
#Now that we have questions before gold, this parameter calculates the profits from the first batch of work based on Samasource profit measurements.
questions_norm = [200]*len(questions[:,:])
profit_3 = 0
profit_2 = 0
profit_norm = 0
for i in range(len(questions[:,:])):
profit_3 += (0.0348*questions[i,4])-((.0008*(questions[i,4])*22) + (.000225*questions[i,4])+(.0000066*questions[i,4]))
profit_2 += (0.0348*questions[i,5])-((.0008*(questions[i,5])*22) + (.000225*questions[i,5])+(.0000066*questions[i,5]))
profit_norm += (0.0348*questions_norm[i])-((.0008*(questions_norm[i])*22) + (.00045*questions_norm[i])+(.0000132*questions_norm[i]))
profit_sum3 += profit_2
profit_sum2 += profit_3
print "Profit for 3 Clusters"
print profit_3
print "Profit for 2 Clusters"
print profit_2
print "Profit for Base Gating Algorithm"
print profit_norm
print " "
print '\033[1m' + "How many questions have been added or subtracted"+ '\033[0m'
print "StepWise StepWisePenalty Attenuated AttenuatedContinous 2Cluster 3Cluster"
changeMatrix = questions - 200
print changeMatrix
average = np.mean(changeMatrix, axis=0)
averagesum_2 += average[4]
averagesum_3 += average[5]
print " "
print '\033[1m' + "Average Change in number of tasks before gold" + '\033[0m'
print "StepWise StepWisePenalty Attenuated AttenuatedContinous 2Cluster 3Cluster"
print average
#Plot the results of the algorthim
#plottings.score_centroid_distributions(perfMat[:,1], centroids, centroids2)
#plottings.tenure_to_performance_plot(perfMat[:,1],user_ten, "Tenure", "Scores")
plottings.scatterOfClusterResults(perfMat[:,1], perfMat[:,2], perfMat[:,3], questions, 'Scores', 'Times', 'Questions before Gold')
## For a given project, once implemented the file would run the following function after the
#questionsCap = [50, 400]
#secondRoundOutput = pf.secondRound(perfMat[:,1], newScores, questions[:,4:], centroids, centroids2, weights, questionsCap, perfMat)
#Print Averages if run the full loop of average production
if loop:
print "Total Profit from 3 Cluster Algorithm: " + str(profit_sum3)
print "Total Profit from 2 Cluster Algorithm: " + str(profit_sum2)
print "Total Profit from Normal: " + str(profit_norm *30)
print "Average Change before gold 2 centroids"
print averagesum_2
print "Average Change before gold 3 centroids"
print averagesum_3
print "Less Gold"
lessGold = ((20/(averagesum_3+200))*300000)-((20/(200))*300000)
print lessGold
print "Gold Savings"
goldSavings = lessGold*.03
print goldSavings