From fdf01090146237a1b74571564fc700046ea1c7a8 Mon Sep 17 00:00:00 2001
From: Julian Maranan <jjm148@canterbury.ac.nz>
Date: Fri, 12 Jan 2024 12:08:04 +1300
Subject: [PATCH 1/4] unit test for instructions_records_to_db

---
 .../test_digitaltwin/data/test_data_to_db.py  |  36 +++++
 .../data/test_get_data_using_geoapis.py       |  55 +++++++
 .../data/test_instructions_records_to_db.py   | 135 ++++++++++++++++++
 3 files changed, 226 insertions(+)
 create mode 100644 tests/test_digitaltwin/data/test_data_to_db.py
 create mode 100644 tests/test_digitaltwin/data/test_get_data_using_geoapis.py
 create mode 100644 tests/test_digitaltwin/data/test_instructions_records_to_db.py

diff --git a/tests/test_digitaltwin/data/test_data_to_db.py b/tests/test_digitaltwin/data/test_data_to_db.py
new file mode 100644
index 000000000..2dc405cc1
--- /dev/null
+++ b/tests/test_digitaltwin/data/test_data_to_db.py
@@ -0,0 +1,36 @@
+import unittest
+from unittest.mock import MagicMock, patch
+import pandas as pd
+
+from src.digitaltwin.setup_environment import get_database
+from src.digitaltwin.data_to_db import get_nz_geospatial_layers
+
+class TestDataToDB(unittest.TestCase):
+    @classmethod
+    @patch("src.digitaltwin.setup_environment.get_connection_from_profile", autospec=True)
+    def setUpClass(cls, mock_get_connection):
+        # Set up a mock database engine
+        mock_engine = MagicMock()
+
+        # Mock the SQL query result
+        mock_query_result = pd.DataFrame({
+            'column1': [1, 2, 3],
+            'column2': ['A', 'B', 'C']
+            # Add more columns as needed
+        })
+
+        # Configure the mock engine to return the query result
+        mock_engine.execute.return_value.fetchall.return_value = mock_query_result.values
+
+        # Mock the database connection setup function
+        mock_get_connection.return_value = mock_engine
+
+        # Call the function with the mock engine
+        cls.dataframe_output = get_nz_geospatial_layers(mock_engine)
+
+    def test_get_nz_geospatial_layers_correct_frame_type(self):
+        """Test to ensure tabular data is returned in DataFrame format."""
+        self.assertIsInstance(self.dataframe_output, pd.DataFrame)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_digitaltwin/data/test_get_data_using_geoapis.py b/tests/test_digitaltwin/data/test_get_data_using_geoapis.py
new file mode 100644
index 000000000..0df931b1c
--- /dev/null
+++ b/tests/test_digitaltwin/data/test_get_data_using_geoapis.py
@@ -0,0 +1,55 @@
+import unittest
+import geopandas as gpd
+from shapely.geometry import Point
+from unittest.mock import patch, MagicMock
+from src.digitaltwin import get_data_using_geoapis
+
+
+def run(layer_id):
+    # Return a sample GeoDataFrame for testing
+    data = {
+        'Name': ['Feature 1', 'Feature 2'],
+        'Value': [10, 20],
+        'geometry': [Point(1, 3), Point(2, 4)]
+    }
+    return gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326')
+
+
+class TestFetchVectorDataUsingGeoApis(unittest.TestCase):
+
+    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
+    def test_fetch_vector_data_statsnz(self):
+        # Test fetching vector data from StatsNZ
+        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("StatsNZ", 1)
+        self.assertIsInstance(result, gpd.GeoDataFrame)
+        # Add more assertions based on the expected behavior of the function
+
+    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
+    def test_fetch_vector_data_linz(self):
+        # Test fetching vector data from LINZ
+        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LINZ", 2)
+        self.assertIsInstance(result, gpd.GeoDataFrame)
+        # Add more assertions based on the expected behavior of the function
+
+    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
+    def test_fetch_vector_data_lris(self):
+        # Test fetching vector data from LRIS
+        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LRIS", 3)
+        self.assertIsInstance(result, gpd.GeoDataFrame)
+        # Add more assertions based on the expected behavior of the function
+
+    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
+    def test_fetch_vector_data_mfe(self):
+        # Test fetching vector data from MFE
+        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("MFE", 4)
+        self.assertIsInstance(result, gpd.GeoDataFrame)
+        # Add more assertions based on the expected behavior of the function
+
+    def test_fetch_vector_data_unsupported_provider(self):
+        # Test raising ValueError for an unsupported data provider
+        with self.assertRaises(ValueError):
+            get_data_using_geoapis.fetch_vector_data_using_geoapis("UnknownProvider", 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_digitaltwin/data/test_instructions_records_to_db.py b/tests/test_digitaltwin/data/test_instructions_records_to_db.py
new file mode 100644
index 000000000..0fb6e89d3
--- /dev/null
+++ b/tests/test_digitaltwin/data/test_instructions_records_to_db.py
@@ -0,0 +1,135 @@
+import unittest
+from unittest.mock import patch
+import requests
+from src.digitaltwin import instructions_records_to_db
+import pandas as pd
+from pathlib import Path
+import tempfile
+
+class TestInstructionsRecordsToDb(unittest.TestCase):
+    def test_validate_url_reachability_valid_url(self):
+        # Checks if the 'validate_url_reachability' function handles a valid url without raising any exceptions
+        url = "https://www.example.com"
+        section = "test_section"
+        with patch("requests.get") as mock_get:
+            mock_get.return_value.status_code = 200
+            # No exception should be raised
+            instructions_records_to_db.validate_url_reachability(section, url)
+
+    def test_validate_url_reachability_invalid_url(self):
+        # Ensures that the 'validate_url_reachability' function raises A 'ValueError' when provided with an invalid url
+        url = "not_a_valid_url"
+        section = "test_section"
+        with self.assertRaises(ValueError) as context:
+            instructions_records_to_db.validate_url_reachability(section, url)
+        self.assertIn("Invalid URL provided", str(context.exception))
+
+    def test_validate_url_reachability_reachable_url(self):
+        # Test with a valid and reachable URL
+        url = "https://www.example.com"
+        section = "test_section"
+        try:
+            # Make a real GET request
+            response = requests.get(url)
+            response.raise_for_status()  # Raise an exception if the response status code indicates an error
+            # If the above line doesn't raise an exception, the URL is considered reachable
+
+            # Validate the URL using your function
+            instructions_records_to_db.validate_url_reachability(section, url)
+
+        except requests.exceptions.RequestException as e:
+            self.fail(f"Unexpected exception: {e}")
+
+    def test_validate_instruction_fields_valid_coverage_area(self):
+        # Test with a valid instruction providing 'coverage_area'
+        section = "test_section"
+        instruction = {"coverage_area": "Area 51"}
+        # No exception should be raised
+        instructions_records_to_db.validate_instruction_fields(section, instruction)
+
+    def test_validate_instruction_fields_valid_unique_column_name(self):
+        # Test with a valid instruction providing 'unique_column_name'
+        section = "test_section"
+        instruction = {"unique_column_name": "column_name"}
+        # No exception should be raised
+        instructions_records_to_db.validate_instruction_fields(section, instruction)
+
+    def test_validate_instruction_fields_invalid_both_fields_provided(self):
+        # Test with an invalid instruction providing both 'coverage_area' and 'unique_column_name'
+        section = "test_section"
+        instruction = {"coverage_area": "Area 51", "unique_column_name": "column_name"}
+        with self.assertRaises(ValueError) as context:
+            instructions_records_to_db.validate_instruction_fields(section, instruction)
+        self.assertIn("Both 'coverage_area' and 'unique_column_name' provided", str(context.exception))
+
+    def test_validate_instruction_fields_invalid_neither_field_provided(self):
+        # Test with an invalid instruction providing neither 'coverage_area' nor 'unique_column_name'
+        section = "test_section"
+        instruction = {}
+        with self.assertRaises(ValueError) as context:
+            instructions_records_to_db.validate_instruction_fields(section, instruction)
+        self.assertIn("Neither 'coverage_area' nor 'unique_column_name' provided", str(context.exception))
+
+    def test_read_and_check_instructions_file(self):
+        # Create a temporary file with sample data
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
+            temp_file.write('{"section1": {"url": "http://example.com", "other_field": "value"}}')
+            temp_file_path = temp_file.name
+
+        try:
+            # Test the read_and_check_instructions_file function
+            with patch("pathlib.Path", return_value=Path(temp_file_path)):
+                with patch(
+                        "src.digitaltwin.instructions_records_to_db.validate_url_reachability") as mock_validate_url_reachability:
+                    with patch(
+                            "src.digitaltwin.instructions_records_to_db.validate_instruction_fields") as mock_validate_instruction_fields:
+                        result_df = instructions_records_to_db.read_and_check_instructions_file()
+
+                        # Assertions
+                        self.assertIsInstance(result_df, pd.DataFrame)
+                        self.assertEqual(len(result_df), 1)
+                        self.assertSetEqual(set(result_df.columns), {'section', 'url', 'other_field'})
+
+                        # Assert that validate_url_reachability was called with the expected arguments
+                        mock_validate_url_reachability.assert_called_with("section1", "http://example.com")
+
+                        # Assert that validate_instruction_fields was called with the expected arguments
+                        mock_validate_instruction_fields.assert_called_with("section1", {"url": "http://example.com"})
+
+        finally:
+            # Clean up: remove the temporary file
+            Path(temp_file_path).unlink()
+
+    def test_get_non_existing_records(self):
+        # Sample data for instructions_df
+        instructions_data = {'data_provider': ['A', 'B', 'C', 'D'],
+                             'layer_id': [1, 2, 3, 4],
+                             'section': ['S1', 'S2', 'S3', 'S4'],
+                             'url': ['url1', 'url2', 'url3', 'url4']}
+        instructions_df = pd.DataFrame(instructions_data)
+
+        # Sample data for existing_layers_df
+        existing_data = {'data_provider': ['A', 'B', 'C'],
+                         'layer_id': [1, 2, 3],
+                         'section': ['S1', 'S2', 'S3'],
+                         'url': ['url1', 'url2', 'url3']}
+        existing_layers_df = pd.DataFrame(existing_data)
+
+        # Call the function
+        with patch(
+                "src.digitaltwin.instructions_records_to_db.get_non_existing_records") as mock_get_non_existing_records:
+            # Set the return value of the mock to simulate the absence of 'url' column
+            mock_get_non_existing_records.return_value = instructions_df
+
+            result = instructions_records_to_db.get_non_existing_records(instructions_df, existing_layers_df)
+
+            # Assertions
+            self.assertIsInstance(result, pd.DataFrame)
+            self.assertEqual(len(result), len(instructions_df))
+
+            # Assert that get_non_existing_records was called with the expected arguments
+            mock_get_non_existing_records.assert_called_with(instructions_df, existing_layers_df)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 2b8482969cc5d470b6f23042499b4aea62c0a026 Mon Sep 17 00:00:00 2001
From: Luke Parkinson <luke.parkinson@canterbury.ac.nz>
Date: Thu, 11 Jul 2024 09:23:02 +1200
Subject: [PATCH 2/4] Move test files out of test data directory

---
 tests/test_digitaltwin/{data => }/test_data_to_db.py              | 0
 tests/test_digitaltwin/{data => }/test_get_data_using_geoapis.py  | 0
 .../{data => }/test_instructions_records_to_db.py                 | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/test_digitaltwin/{data => }/test_data_to_db.py (100%)
 rename tests/test_digitaltwin/{data => }/test_get_data_using_geoapis.py (100%)
 rename tests/test_digitaltwin/{data => }/test_instructions_records_to_db.py (100%)

diff --git a/tests/test_digitaltwin/data/test_data_to_db.py b/tests/test_digitaltwin/test_data_to_db.py
similarity index 100%
rename from tests/test_digitaltwin/data/test_data_to_db.py
rename to tests/test_digitaltwin/test_data_to_db.py
diff --git a/tests/test_digitaltwin/data/test_get_data_using_geoapis.py b/tests/test_digitaltwin/test_get_data_using_geoapis.py
similarity index 100%
rename from tests/test_digitaltwin/data/test_get_data_using_geoapis.py
rename to tests/test_digitaltwin/test_get_data_using_geoapis.py
diff --git a/tests/test_digitaltwin/data/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py
similarity index 100%
rename from tests/test_digitaltwin/data/test_instructions_records_to_db.py
rename to tests/test_digitaltwin/test_instructions_records_to_db.py

From e095ab7838a03ef7fdb910293eda69eb3da8f959 Mon Sep 17 00:00:00 2001
From: Luke Parkinson <luke.parkinson@canterbury.ac.nz>
Date: Fri, 19 Jul 2024 13:30:34 +1200
Subject: [PATCH 3/4] Remove tests that mostly test mocked objects without
 implementation

---
 tests/test_digitaltwin/test_data_to_db.py     | 36 ----------------
 .../test_get_data_using_geoapis.py            | 42 +------------------
 .../test_instructions_records_to_db.py        | 39 +++--------------
 3 files changed, 7 insertions(+), 110 deletions(-)
 delete mode 100644 tests/test_digitaltwin/test_data_to_db.py

diff --git a/tests/test_digitaltwin/test_data_to_db.py b/tests/test_digitaltwin/test_data_to_db.py
deleted file mode 100644
index 2dc405cc1..000000000
--- a/tests/test_digitaltwin/test_data_to_db.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import unittest
-from unittest.mock import MagicMock, patch
-import pandas as pd
-
-from src.digitaltwin.setup_environment import get_database
-from src.digitaltwin.data_to_db import get_nz_geospatial_layers
-
-class TestDataToDB(unittest.TestCase):
-    @classmethod
-    @patch("src.digitaltwin.setup_environment.get_connection_from_profile", autospec=True)
-    def setUpClass(cls, mock_get_connection):
-        # Set up a mock database engine
-        mock_engine = MagicMock()
-
-        # Mock the SQL query result
-        mock_query_result = pd.DataFrame({
-            'column1': [1, 2, 3],
-            'column2': ['A', 'B', 'C']
-            # Add more columns as needed
-        })
-
-        # Configure the mock engine to return the query result
-        mock_engine.execute.return_value.fetchall.return_value = mock_query_result.values
-
-        # Mock the database connection setup function
-        mock_get_connection.return_value = mock_engine
-
-        # Call the function with the mock engine
-        cls.dataframe_output = get_nz_geospatial_layers(mock_engine)
-
-    def test_get_nz_geospatial_layers_correct_frame_type(self):
-        """Test to ensure tabular data is returned in DataFrame format."""
-        self.assertIsInstance(self.dataframe_output, pd.DataFrame)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/test_digitaltwin/test_get_data_using_geoapis.py b/tests/test_digitaltwin/test_get_data_using_geoapis.py
index 0df931b1c..481bb9365 100644
--- a/tests/test_digitaltwin/test_get_data_using_geoapis.py
+++ b/tests/test_digitaltwin/test_get_data_using_geoapis.py
@@ -1,50 +1,10 @@
 import unittest
-import geopandas as gpd
-from shapely.geometry import Point
-from unittest.mock import patch, MagicMock
-from src.digitaltwin import get_data_using_geoapis
-
 
-def run(layer_id):
-    # Return a sample GeoDataFrame for testing
-    data = {
-        'Name': ['Feature 1', 'Feature 2'],
-        'Value': [10, 20],
-        'geometry': [Point(1, 3), Point(2, 4)]
-    }
-    return gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326')
+from src.digitaltwin import get_data_using_geoapis
 
 
 class TestFetchVectorDataUsingGeoApis(unittest.TestCase):
 
-    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
-    def test_fetch_vector_data_statsnz(self):
-        # Test fetching vector data from StatsNZ
-        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("StatsNZ", 1)
-        self.assertIsInstance(result, gpd.GeoDataFrame)
-        # Add more assertions based on the expected behavior of the function
-
-    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
-    def test_fetch_vector_data_linz(self):
-        # Test fetching vector data from LINZ
-        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LINZ", 2)
-        self.assertIsInstance(result, gpd.GeoDataFrame)
-        # Add more assertions based on the expected behavior of the function
-
-    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
-    def test_fetch_vector_data_lris(self):
-        # Test fetching vector data from LRIS
-        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LRIS", 3)
-        self.assertIsInstance(result, gpd.GeoDataFrame)
-        # Add more assertions based on the expected behavior of the function
-
-    @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key'))
-    def test_fetch_vector_data_mfe(self):
-        # Test fetching vector data from MFE
-        result = get_data_using_geoapis.fetch_vector_data_using_geoapis("MFE", 4)
-        self.assertIsInstance(result, gpd.GeoDataFrame)
-        # Add more assertions based on the expected behavior of the function
-
     def test_fetch_vector_data_unsupported_provider(self):
         # Test raising ValueError for an unsupported data provider
         with self.assertRaises(ValueError):
diff --git a/tests/test_digitaltwin/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py
index 0fb6e89d3..0f27fc4c2 100644
--- a/tests/test_digitaltwin/test_instructions_records_to_db.py
+++ b/tests/test_digitaltwin/test_instructions_records_to_db.py
@@ -1,10 +1,13 @@
+import tempfile
 import unittest
+from pathlib import Path
 from unittest.mock import patch
+
+import pandas as pd
 import requests
+
 from src.digitaltwin import instructions_records_to_db
-import pandas as pd
-from pathlib import Path
-import tempfile
+
 
 class TestInstructionsRecordsToDb(unittest.TestCase):
     def test_validate_url_reachability_valid_url(self):
@@ -100,36 +103,6 @@ def test_read_and_check_instructions_file(self):
             # Clean up: remove the temporary file
             Path(temp_file_path).unlink()
 
-    def test_get_non_existing_records(self):
-        # Sample data for instructions_df
-        instructions_data = {'data_provider': ['A', 'B', 'C', 'D'],
-                             'layer_id': [1, 2, 3, 4],
-                             'section': ['S1', 'S2', 'S3', 'S4'],
-                             'url': ['url1', 'url2', 'url3', 'url4']}
-        instructions_df = pd.DataFrame(instructions_data)
-
-        # Sample data for existing_layers_df
-        existing_data = {'data_provider': ['A', 'B', 'C'],
-                         'layer_id': [1, 2, 3],
-                         'section': ['S1', 'S2', 'S3'],
-                         'url': ['url1', 'url2', 'url3']}
-        existing_layers_df = pd.DataFrame(existing_data)
-
-        # Call the function
-        with patch(
-                "src.digitaltwin.instructions_records_to_db.get_non_existing_records") as mock_get_non_existing_records:
-            # Set the return value of the mock to simulate the absence of 'url' column
-            mock_get_non_existing_records.return_value = instructions_df
-
-            result = instructions_records_to_db.get_non_existing_records(instructions_df, existing_layers_df)
-
-            # Assertions
-            self.assertIsInstance(result, pd.DataFrame)
-            self.assertEqual(len(result), len(instructions_df))
-
-            # Assert that get_non_existing_records was called with the expected arguments
-            mock_get_non_existing_records.assert_called_with(instructions_df, existing_layers_df)
-
 
 if __name__ == '__main__':
     unittest.main()

From ba1a9e6e06f3c38c1169783928d7d976fe0316c5 Mon Sep 17 00:00:00 2001
From: Luke Parkinson <luke.parkinson@canterbury.ac.nz>
Date: Fri, 19 Jul 2024 13:30:57 +1200
Subject: [PATCH 4/4] Fix broken test

---
 tests/test_digitaltwin/test_instructions_records_to_db.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_digitaltwin/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py
index 0f27fc4c2..40ace1964 100644
--- a/tests/test_digitaltwin/test_instructions_records_to_db.py
+++ b/tests/test_digitaltwin/test_instructions_records_to_db.py
@@ -76,7 +76,7 @@ def test_validate_instruction_fields_invalid_neither_field_provided(self):
     def test_read_and_check_instructions_file(self):
         # Create a temporary file with sample data
         with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
-            temp_file.write('{"section1": {"url": "http://example.com", "other_field": "value"}}')
+            temp_file.write('{"section1": {"url": "https://example.com", "other_field": "value"}}')
             temp_file_path = temp_file.name
 
         try:
@@ -94,10 +94,11 @@ def test_read_and_check_instructions_file(self):
                         self.assertSetEqual(set(result_df.columns), {'section', 'url', 'other_field'})
 
                         # Assert that validate_url_reachability was called with the expected arguments
-                        mock_validate_url_reachability.assert_called_with("section1", "http://example.com")
+                        mock_validate_url_reachability.assert_called_with("section1", "https://example.com")
 
                         # Assert that validate_instruction_fields was called with the expected arguments
-                        mock_validate_instruction_fields.assert_called_with("section1", {"url": "http://example.com"})
+                        mock_validate_instruction_fields.assert_called_with("section1", {"url": "https://example.com",
+                                                                                         'other_field': 'value'})
 
         finally:
             # Clean up: remove the temporary file