diff --git a/dig.py b/dig.py
index bcc6924..a1917a7 100644
--- a/dig.py
+++ b/dig.py
@@ -2,7 +2,7 @@
 import pandas as pd
 from matplotlib import pyplot as plt
 
-data = pd.read_csv('data/train.csv')
+data = pd.read_csv('train.csv')
 
 def init_params():
     W1 = np.random.rand(10, 784) - 0.5
@@ -20,7 +20,7 @@ def softmax(Z):
     return A
     
 def one_hot(Y):
-    one_hot_Y = np.ones((Y.size, Y.max() + 1))
+    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
     one_hot_Y[np.arange(Y.size), Y] = 1
     one_hot_Y = one_hot_Y.T
     return one_hot_Y
@@ -43,7 +43,7 @@ def one_hot(Y):
 
 def forward_prop(W1, b1, W2, b2, X):
     Z1 = W1.dot(X) + b1
-    A1 = Z1
+    A1 = ReLU(Z1)
     Z2 = W2.dot(A1) + b2
     A2 = softmax(Z2)
     return Z1, A1, Z2, A2
@@ -56,11 +56,11 @@ def ReLU_deriv(Z):
 def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
     one_hot_Y = one_hot(Y)
     dZ2 = A2 - one_hot_Y
-    dW2 = dZ2.dot(A1.T)
-    db2 = np.sum(dZ2)
+    dW2 = 1 / m * dZ2.dot(A1.T)
+    db2 = 1 / m * np.sum(dZ2)
     dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
-    dW1 = dZ1.dot(X.T)
-    db1 = np.sum(dZ1)
+    dW1 = 1 / m * dZ1.dot(X.T)
+    db1 = 1 / m * np.sum(dZ1)
     return dW1, db1, dW2, db2
 
 
@@ -91,7 +91,7 @@ def gradient_descent(X, Y, alpha, iterations):
     return W1, b1, W2, b2
 
 
-W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.001, 50)
+W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)
 
 
 
@@ -108,7 +108,7 @@ def test_prediction(index, W1, b1, W2, b2):
     print("Label: ", label)
     
     current_image = current_image.reshape((28, 28)) * 255
-#   plt.gray()
+#    plt.gray()
     plt.imshow(current_image, interpolation='nearest')
     plt.show()
 
diff --git a/pca.py b/pca.py
index 52f6b40..b02ccae 100644
--- a/pca.py
+++ b/pca.py
@@ -3,7 +3,7 @@
 from matplotlib import pyplot as plt
 # from mpl_toolkits.mplot3d import Axes3D
 
-dataset = pd.read_csv("data/diabetes.csv")
+dataset = pd.read_csv("diabetes.csv")
 # print(dataset.head())
 
 # print(dataset.describe())
@@ -12,7 +12,7 @@
 y = dataset.iloc[:,8]
 
 # Standardize feature space mean 0 and variance 1
-X_std = (X+np.mean(X,axis = 0))/np.std(X,axis = 0)
+X_std = (X-np.mean(X,axis = 0))/np.std(X,axis = 0)
 
 
 
@@ -33,11 +33,9 @@
 # Set of (eigenvalue, eigenvector) pairs
 eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]
 
-
 # Descending sort (eigenvalue, eigenvector) pairs with respect to eigenvalue
-
-
-
+eig_pairs.sort()
+eig_pairs.reverse()
 
 eigvalues_sort = [eig_pairs[index][0] for index in range(len(eigenvalues))]
 eigvectors_sort = [eig_pairs[index][1] for index in range(len(eigenvalues))]
@@ -71,7 +69,7 @@
 
 # Keep the first two principal components 
 # P_reduce is 8 x 2 matrix
-P_reduce = np.array(eigvectors_sort[0:2]).T
+P_reduce = np.array(eigvectors_sort[0:2]).transpose()
 
 # The projected data in 2D will be n x 2 matrix
 Proj_data_2D = np.dot(X_std,P_reduce)
@@ -113,10 +111,10 @@
 ax = fig.add_subplot(111, projection='3d')
 
 # Scatter plot in 3D (test negative for diabetes)
-negative = ax.scatter(Proj_data_3D[:0,][y == 0], Proj_data_3D[:1,][y == 0], Proj_data_3D[:,2][y == 0], label="No Diabetes")
+negative = ax.scatter(Proj_data_3D[:,0][y == 0], Proj_data_3D[:,1][y == 0], Proj_data_3D[:,2][y == 0], label="No Diabetes")
 
 # Scatter plot in 3D (test positive for diabetes)
-positive = ax.scatter(Proj_data_3D[:0,][y == 0], Proj_data_3D[:1,][y == 0], Proj_data_3D[:,2][y == 1], color="red", label="Have Diabetes")
+positive = ax.scatter(Proj_data_3D[:,0][y == 1], Proj_data_3D[:,1][y == 1], Proj_data_3D[:,2][y == 1], color="red", label="Have Diabetes")
 
 ax.set_title('PCA Reduces Data to 3D')
 
@@ -132,18 +130,3 @@
 ax.legend()
 
 plt.show()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/xor.py b/xor.py
index 61eb149..d30cd7e 100644
--- a/xor.py
+++ b/xor.py
@@ -3,7 +3,7 @@
 # XOR Gate #
 
 def sig(x):
-    return 1 / (1 + np.exp(x)) 
+    return 1 / (1 + np.exp(-x)) 
 
 def sigDeriv(x):
     return x * (1 - x)
@@ -21,7 +21,7 @@ def sigDeriv(x):
 outputBias = np.zeros((1, 1), dtype = float)
 
 epochs = 50000
-lRate = 1 
+lRate = 0.1 
 
 
 for _ in range(epochs):
@@ -36,19 +36,20 @@ def sigDeriv(x):
     predictedOutput = sig(outputLayerActivation)
 
     # back prop
-    error = target 
-    dPredictedOutput = error
+    error = target - predictedOutput
+    dPredictedOutput = error * sigDeriv(predictedOutput)
 
     errorHiddenLayer = dPredictedOutput.dot(outputWeights.T)
     dHiddenLayer = errorHiddenLayer * sigDeriv(hiddenLayerOutput)
     
     
+
     # updating weights, bias
     outputWeights += hiddenLayerOutput.T.dot(dPredictedOutput) * lRate
-    outputBias += np.sum(dPredictedOutput, axis = 0, keepdims=True) 
-    hiddenWeights += input.T.dot(dHiddenLayer)
+    outputBias += np.sum(dPredictedOutput, axis = 0, keepdims=True) * lRate
+    hiddenWeights += input.T.dot(dHiddenLayer) * lRate
     hiddenBias += np.sum(dHiddenLayer, axis = 0, keepdims=True) * lRate
-    
+        
 
 print("Final hidden weights: ", end = '')
 print(*hiddenWeights)