From 73102106b556b7012119e42acff693a10a70b5cb Mon Sep 17 00:00:00 2001
From: khoivan88 <33493502+khoivan88@users.noreply.github.com>
Date: Mon, 16 Mar 2020 15:45:46 -0400
Subject: [PATCH 1/2] Suggestion alternative for find numbers of columns; Other
 typos and lintings

---
 _episodes/04-tabular_data.md | 72 ++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 31 deletions(-)

diff --git a/_episodes/04-tabular_data.md b/_episodes/04-tabular_data.md
index 5c81f62..ec37c96 100644
--- a/_episodes/04-tabular_data.md
+++ b/_episodes/04-tabular_data.md
@@ -253,6 +253,13 @@ In our example, the "end" value needs to be the number of columns of data.
 >> ~~~
 >> {: .language-python}
 >>
+>> Alternatively:
+>> ~~~
+>> num_rows, num_columns = data.shape  # data.shape returns (10000, 5) which is the numbers of rows and columns, respectively
+>> print(num_columns)
+>> ~~~
+>> {: .language-python}
+>> 
 >> ~~~
 >> 5
 >> ~~~
@@ -263,9 +270,9 @@ In our example, the "end" value needs to be the number of columns of data.
 Now that we know the number of columns, we can use the `range()` function to set up our `for` loop.
 ```
 for i in range(1,num_columns):
-    column = data[:,i]
+    column = data[:, i]
     avg_col = numpy.mean(column)
-    print(F'{headers[i]} : {avg_col}')
+    print(f'{headers[i]} : {avg_col}')
 ```
 {: .language-python}
 ```
@@ -308,18 +315,17 @@ TYR6_ASP : 10.9934435
 >>
 >> file_location = os.path.join('data', 'water.xyz')
 >> xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2,  dtype='unicode')
->> symbols = xyz_file[:,0]
->> coordinates = (xyz_file[:,1:])
+>> symbols = xyz_file[:, 0]
+>> coordinates = xyz_file[:, 1:]
 >> coordinates = coordinates.astype(numpy.float)
 >> num_atoms = len(symbols)
->> BLfile = open('bond_lengths.txt','w+')   #Open a file for writing
->> for num1 in range(0,num_atoms):
->>     for num2 in range(0,num_atoms):
->>         x_distance = coordinates[num1,0] - coordinates[num2,0]
->>         y_distance = coordinates[num1,1] - coordinates[num2,1]
->>         z_distance = coordinates[num1,2] - coordinates[num2,2]
->>         bond_length_12 = numpy.sqrt(x_distance**2+y_distance**2+z_distance**2)
->>         print(F'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')  
+>> for num1 in range(0, num_atoms):
+>>     for num2 in range(0, num_atoms):
+>>         x_distance = coordinates[num1, 0] - coordinates[num2, 0]
+>>         y_distance = coordinates[num1, 1] - coordinates[num2, 1]
+>>         z_distance = coordinates[num1, 2] - coordinates[num2, 2]
+>>         bond_length_12 = numpy.sqrt(x_distance ** 2 + y_distance ** 2 + z_distance ** 2)
+>>         print(f'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')  # print result to screen
 >>
 >> ~~~
 >> {: .language-python}
@@ -328,9 +334,11 @@ TYR6_ASP : 10.9934435
 >> ~~~
 >> import numpy
 >> import os
+>>
 >> file_location = os.path.join('data', 'water.xyz')
 >> xyzfile = open(file_location,"r")
->> data=xyzfile.readlines()
+>> data = xyzfile.readlines()
+>> xyzfile.close()
 >> num_atoms = int(data[0])
 >> data = data[2:]
 >>
@@ -341,18 +349,18 @@ TYR6_ASP : 10.9934435
 >>     atom_data = atom.split()
 >>     symbol = atom_data[0]
 >>     symbols.append(symbol)
->>     x, y, z = atom_data[1], atom_data[2], atom_data[3]
+>>     x, y, z = atom_data[1:]    # or `x, y, z = atom_data[1], atom_data[2], atom_data[3]`
 >>     coordinates.append([float(x), float(y), float(z)])
 >>
->> for num1 in range(0,num_atoms):
->>     for num2 in range(0,num_atoms):
+>> for num1 in range(0, num_atoms):
+>>     for num2 in range(0, num_atoms):
 >>         atom1 = coordinates[num1]
 >>         atom2 = coordinates[num2]
 >>         x_distance = atom1[0] - atom2[0]
 >>         y_distance = atom1[1] - atom2[1]
 >>         z_distance = atom1[2] - atom2[2]
->>         bond_length_12 = numpy.sqrt(x_distance**2+y_distance**2+z_distance**2)
->>         print(F'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')
+>>         bond_length_12 = numpy.sqrt(x_distance ** 2 + y_distance ** 2 + z_distance ** 2)
+>>         print(f'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')
 >> ~~~
 >> {: .language-python}
 > {: .solution}
@@ -361,7 +369,7 @@ TYR6_ASP : 10.9934435
 
 > ## Variable Names
 >
-> In our solution above, we called our bond length variable `bond_length_AB`. We could have called this variable anything we wanted. Consider the following two potential variable names for bond length - `BL_AB` and `bond_length_AB`. Which is more clear to you? While you might know what `BL` means, and it is possible for others to figure it out through context, it's easier on others if you give your variables clear names.
+> In our solution above, we called our bond length variable `bond_length_12`. We could have called this variable anything we wanted. Consider the following two potential variable names for bond length - `BL_12` and `bond_length_12`. Which is more clear to you? While you might know what `BL` means, and it is possible for others to figure it out through context, it's easier on others if you give your variables clear names.
 {: .callout}
 
 > ## Project Extension 1
@@ -373,7 +381,7 @@ TYR6_ASP : 10.9934435
 >> Add an `if` statement before your print statement.
 >> ~~~
 >> if bond_length_12 > 0 and bond_length_12 <= 1.5:
->>         print(F'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')  
+>>     print(f'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')  
 >> ~~~
 >> {: .language-python}
 >>
@@ -400,19 +408,19 @@ TYR6_ASP : 10.9934435
 >>
 >> file_location = os.path.join('data', 'water.xyz')
 >> xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2, dtype='unicode')
->> symbols = xyz_file[:,0]
->> coordinates = (xyz_file[:,1:])
+>> symbols = xyz_file[:, 0]
+>> coordinates = xyz_file[:, 1:]
 >> coordinates = coordinates.astype(numpy.float)
 >> num_atoms = len(symbols)
->> for num1 in range(0,num_atoms):
->>     for num2 in range(0,num_atoms):
->>         if num1<num2:
->>             x_distance = coordinates[num1,0] - coordinates[num2,0]
->>             y_distance = coordinates[num1,1] - coordinates[num2,1]
->>             z_distance = coordinates[num1,2] - coordinates[num2,2]
->>             bond_length_12 = numpy.sqrt(x_distance**2+y_distance**2+z_distance**2)
+>> for num1 in range(0, num_atoms):
+>>     for num2 in range(0, num_atoms):
+>>         if num1 < num2:
+>>             x_distance = coordinates[num1, 0] - coordinates[num2, 0]
+>>             y_distance = coordinates[num1, 1] - coordinates[num2, 1]
+>>             z_distance = coordinates[num1, 2] - coordinates[num2, 2]
+>>             bond_length_12 = numpy.sqrt(x_distance ** 2 + y_distance ** 2 + z_distance ** 2)
 >>             if bond_length_12 > 0 and bond_length_12 <= 1.5:
->>                 print(F'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')
+>>                 print(f'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')
 >> ~~~
 >> {: .language-python}
 >>
@@ -438,10 +446,12 @@ TYR6_ASP : 10.9934435
 >>
 >> Then inside your loop change your `print` statement to a `BLfile.write()` statement.
 >> ~~~
->> BLfile.write(F'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}')
+>> BLfile.write(f'{symbols[num1]} to {symbols[num2]} : {bond_length_12:.3f}\n')
 >> ~~~
 >> {: .language-python}
 >>
+>> Note: `write()` does not add a new line at the end of each line so `\n` is required!
+>>
 >> Don't forget to close the file at the end of your code.
 >> ~~~
 >> BLfile.close()      #Close the file so it will write

From 61336446583e58559aeae434deae336b11b453e1 Mon Sep 17 00:00:00 2001
From: khoivan88 <33493502+khoivan88@users.noreply.github.com>
Date: Thu, 26 Mar 2020 15:16:22 -0400
Subject: [PATCH 2/2] Revert back to original document

I think my suggestion is too ambiguous and probably causes confusion for beginners
---
 _episodes/04-tabular_data.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_episodes/04-tabular_data.md b/_episodes/04-tabular_data.md
index ec37c96..a2f6334 100644
--- a/_episodes/04-tabular_data.md
+++ b/_episodes/04-tabular_data.md
@@ -269,7 +269,7 @@ In our example, the "end" value needs to be the number of columns of data.
 
 Now that we know the number of columns, we can use the `range()` function to set up our `for` loop.
 ```
-for i in range(1,num_columns):
+for i in range(1, num_columns):
     column = data[:, i]
     avg_col = numpy.mean(column)
     print(f'{headers[i]} : {avg_col}')
@@ -349,7 +349,7 @@ TYR6_ASP : 10.9934435
 >>     atom_data = atom.split()
 >>     symbol = atom_data[0]
 >>     symbols.append(symbol)
->>     x, y, z = atom_data[1:]    # or `x, y, z = atom_data[1], atom_data[2], atom_data[3]`
+>>     x, y, z = atom_data[1], atom_data[2], atom_data[3]
 >>     coordinates.append([float(x), float(y), float(z)])
 >>
 >> for num1 in range(0, num_atoms):