From fdf01090146237a1b74571564fc700046ea1c7a8 Mon Sep 17 00:00:00 2001 From: Julian Maranan Date: Fri, 12 Jan 2024 12:08:04 +1300 Subject: [PATCH 1/4] unit test for instructions_records_to_db --- .../test_digitaltwin/data/test_data_to_db.py | 36 +++++ .../data/test_get_data_using_geoapis.py | 55 +++++++ .../data/test_instructions_records_to_db.py | 135 ++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 tests/test_digitaltwin/data/test_data_to_db.py create mode 100644 tests/test_digitaltwin/data/test_get_data_using_geoapis.py create mode 100644 tests/test_digitaltwin/data/test_instructions_records_to_db.py diff --git a/tests/test_digitaltwin/data/test_data_to_db.py b/tests/test_digitaltwin/data/test_data_to_db.py new file mode 100644 index 000000000..2dc405cc1 --- /dev/null +++ b/tests/test_digitaltwin/data/test_data_to_db.py @@ -0,0 +1,36 @@ +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + +from src.digitaltwin.setup_environment import get_database +from src.digitaltwin.data_to_db import get_nz_geospatial_layers + +class TestDataToDB(unittest.TestCase): + @classmethod + @patch("src.digitaltwin.setup_environment.get_connection_from_profile", autospec=True) + def setUpClass(cls, mock_get_connection): + # Set up a mock database engine + mock_engine = MagicMock() + + # Mock the SQL query result + mock_query_result = pd.DataFrame({ + 'column1': [1, 2, 3], + 'column2': ['A', 'B', 'C'] + # Add more columns as needed + }) + + # Configure the mock engine to return the query result + mock_engine.execute.return_value.fetchall.return_value = mock_query_result.values + + # Mock the database connection setup function + mock_get_connection.return_value = mock_engine + + # Call the function with the mock engine + cls.dataframe_output = get_nz_geospatial_layers(mock_engine) + + def test_get_nz_geospatial_layers_correct_frame_type(self): + """Test to ensure tabular data is returned in DataFrame format.""" + self.assertIsInstance(self.dataframe_output, pd.DataFrame) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_digitaltwin/data/test_get_data_using_geoapis.py b/tests/test_digitaltwin/data/test_get_data_using_geoapis.py new file mode 100644 index 000000000..0df931b1c --- /dev/null +++ b/tests/test_digitaltwin/data/test_get_data_using_geoapis.py @@ -0,0 +1,55 @@ +import unittest +import geopandas as gpd +from shapely.geometry import Point +from unittest.mock import patch, MagicMock +from src.digitaltwin import get_data_using_geoapis + + +def run(layer_id): + # Return a sample GeoDataFrame for testing + data = { + 'Name': ['Feature 1', 'Feature 2'], + 'Value': [10, 20], + 'geometry': [Point(1, 3), Point(2, 4)] + } + return gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326') + + +class TestFetchVectorDataUsingGeoApis(unittest.TestCase): + + @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) + def test_fetch_vector_data_statsnz(self): + # Test fetching vector data from StatsNZ + result = get_data_using_geoapis.fetch_vector_data_using_geoapis("StatsNZ", 1) + self.assertIsInstance(result, gpd.GeoDataFrame) + # Add more assertions based on the expected behavior of the function + + @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) + def test_fetch_vector_data_linz(self): + # Test fetching vector data from LINZ + result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LINZ", 2) + self.assertIsInstance(result, gpd.GeoDataFrame) + # Add more assertions based on the expected behavior of the function + + @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) + def test_fetch_vector_data_lris(self): + # Test fetching vector data from LRIS + result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LRIS", 3) + self.assertIsInstance(result, gpd.GeoDataFrame) + # Add more assertions based on the expected behavior of the function + + @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) + def test_fetch_vector_data_mfe(self): + # Test fetching vector data from MFE + result = get_data_using_geoapis.fetch_vector_data_using_geoapis("MFE", 4) + self.assertIsInstance(result, gpd.GeoDataFrame) + # Add more assertions based on the expected behavior of the function + + def test_fetch_vector_data_unsupported_provider(self): + # Test raising ValueError for an unsupported data provider + with self.assertRaises(ValueError): + get_data_using_geoapis.fetch_vector_data_using_geoapis("UnknownProvider", 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_digitaltwin/data/test_instructions_records_to_db.py b/tests/test_digitaltwin/data/test_instructions_records_to_db.py new file mode 100644 index 000000000..0fb6e89d3 --- /dev/null +++ b/tests/test_digitaltwin/data/test_instructions_records_to_db.py @@ -0,0 +1,135 @@ +import unittest +from unittest.mock import patch +import requests +from src.digitaltwin import instructions_records_to_db +import pandas as pd +from pathlib import Path +import tempfile + +class TestInstructionsRecordsToDb(unittest.TestCase): + def test_validate_url_reachability_valid_url(self): + # Checks if the 'validate_url_reachability' function handles a valid url without raising any exceptions + url = "https://www.example.com" + section = "test_section" + with patch("requests.get") as mock_get: + mock_get.return_value.status_code = 200 + # No exception should be raised + instructions_records_to_db.validate_url_reachability(section, url) + + def test_validate_url_reachability_invalid_url(self): + # Ensures that the 'validate_url_reachability' function raises A 'ValueError' when provided with an invalid url + url = "not_a_valid_url" + section = "test_section" + with self.assertRaises(ValueError) as context: + instructions_records_to_db.validate_url_reachability(section, url) + self.assertIn("Invalid URL provided", str(context.exception)) + + def test_validate_url_reachability_reachable_url(self): + # Test with a valid and reachable URL + url = "https://www.example.com" + section = "test_section" + try: + # Make a real GET request + response = requests.get(url) + response.raise_for_status() # Raise an exception if the response status code indicates an error + # If the above line doesn't raise an exception, the URL is considered reachable + + # Validate the URL using your function + instructions_records_to_db.validate_url_reachability(section, url) + + except requests.exceptions.RequestException as e: + self.fail(f"Unexpected exception: {e}") + + def test_validate_instruction_fields_valid_coverage_area(self): + # Test with a valid instruction providing 'coverage_area' + section = "test_section" + instruction = {"coverage_area": "Area 51"} + # No exception should be raised + instructions_records_to_db.validate_instruction_fields(section, instruction) + + def test_validate_instruction_fields_valid_unique_column_name(self): + # Test with a valid instruction providing 'unique_column_name' + section = "test_section" + instruction = {"unique_column_name": "column_name"} + # No exception should be raised + instructions_records_to_db.validate_instruction_fields(section, instruction) + + def test_validate_instruction_fields_invalid_both_fields_provided(self): + # Test with an invalid instruction providing both 'coverage_area' and 'unique_column_name' + section = "test_section" + instruction = {"coverage_area": "Area 51", "unique_column_name": "column_name"} + with self.assertRaises(ValueError) as context: + instructions_records_to_db.validate_instruction_fields(section, instruction) + self.assertIn("Both 'coverage_area' and 'unique_column_name' provided", str(context.exception)) + + def test_validate_instruction_fields_invalid_neither_field_provided(self): + # Test with an invalid instruction providing neither 'coverage_area' nor 'unique_column_name' + section = "test_section" + instruction = {} + with self.assertRaises(ValueError) as context: + instructions_records_to_db.validate_instruction_fields(section, instruction) + self.assertIn("Neither 'coverage_area' nor 'unique_column_name' provided", str(context.exception)) + + def test_read_and_check_instructions_file(self): + # Create a temporary file with sample data + with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file: + temp_file.write('{"section1": {"url": "http://example.com", "other_field": "value"}}') + temp_file_path = temp_file.name + + try: + # Test the read_and_check_instructions_file function + with patch("pathlib.Path", return_value=Path(temp_file_path)): + with patch( + "src.digitaltwin.instructions_records_to_db.validate_url_reachability") as mock_validate_url_reachability: + with patch( + "src.digitaltwin.instructions_records_to_db.validate_instruction_fields") as mock_validate_instruction_fields: + result_df = instructions_records_to_db.read_and_check_instructions_file() + + # Assertions + self.assertIsInstance(result_df, pd.DataFrame) + self.assertEqual(len(result_df), 1) + self.assertSetEqual(set(result_df.columns), {'section', 'url', 'other_field'}) + + # Assert that validate_url_reachability was called with the expected arguments + mock_validate_url_reachability.assert_called_with("section1", "http://example.com") + + # Assert that validate_instruction_fields was called with the expected arguments + mock_validate_instruction_fields.assert_called_with("section1", {"url": "http://example.com"}) + + finally: + # Clean up: remove the temporary file + Path(temp_file_path).unlink() + + def test_get_non_existing_records(self): + # Sample data for instructions_df + instructions_data = {'data_provider': ['A', 'B', 'C', 'D'], + 'layer_id': [1, 2, 3, 4], + 'section': ['S1', 'S2', 'S3', 'S4'], + 'url': ['url1', 'url2', 'url3', 'url4']} + instructions_df = pd.DataFrame(instructions_data) + + # Sample data for existing_layers_df + existing_data = {'data_provider': ['A', 'B', 'C'], + 'layer_id': [1, 2, 3], + 'section': ['S1', 'S2', 'S3'], + 'url': ['url1', 'url2', 'url3']} + existing_layers_df = pd.DataFrame(existing_data) + + # Call the function + with patch( + "src.digitaltwin.instructions_records_to_db.get_non_existing_records") as mock_get_non_existing_records: + # Set the return value of the mock to simulate the absence of 'url' column + mock_get_non_existing_records.return_value = instructions_df + + result = instructions_records_to_db.get_non_existing_records(instructions_df, existing_layers_df) + + # Assertions + self.assertIsInstance(result, pd.DataFrame) + self.assertEqual(len(result), len(instructions_df)) + + # Assert that get_non_existing_records was called with the expected arguments + mock_get_non_existing_records.assert_called_with(instructions_df, existing_layers_df) + + +if __name__ == '__main__': + unittest.main() From 2b8482969cc5d470b6f23042499b4aea62c0a026 Mon Sep 17 00:00:00 2001 From: Luke Parkinson Date: Thu, 11 Jul 2024 09:23:02 +1200 Subject: [PATCH 2/4] Move test files out of test data directory --- tests/test_digitaltwin/{data => }/test_data_to_db.py | 0 tests/test_digitaltwin/{data => }/test_get_data_using_geoapis.py | 0 .../{data => }/test_instructions_records_to_db.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/test_digitaltwin/{data => }/test_data_to_db.py (100%) rename tests/test_digitaltwin/{data => }/test_get_data_using_geoapis.py (100%) rename tests/test_digitaltwin/{data => }/test_instructions_records_to_db.py (100%) diff --git a/tests/test_digitaltwin/data/test_data_to_db.py b/tests/test_digitaltwin/test_data_to_db.py similarity index 100% rename from tests/test_digitaltwin/data/test_data_to_db.py rename to tests/test_digitaltwin/test_data_to_db.py diff --git a/tests/test_digitaltwin/data/test_get_data_using_geoapis.py b/tests/test_digitaltwin/test_get_data_using_geoapis.py similarity index 100% rename from tests/test_digitaltwin/data/test_get_data_using_geoapis.py rename to tests/test_digitaltwin/test_get_data_using_geoapis.py diff --git a/tests/test_digitaltwin/data/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py similarity index 100% rename from tests/test_digitaltwin/data/test_instructions_records_to_db.py rename to tests/test_digitaltwin/test_instructions_records_to_db.py From e095ab7838a03ef7fdb910293eda69eb3da8f959 Mon Sep 17 00:00:00 2001 From: Luke Parkinson Date: Fri, 19 Jul 2024 13:30:34 +1200 Subject: [PATCH 3/4] Remove tests that mostly test mocked objects without implementation --- tests/test_digitaltwin/test_data_to_db.py | 36 ---------------- .../test_get_data_using_geoapis.py | 42 +------------------ .../test_instructions_records_to_db.py | 39 +++-------------- 3 files changed, 7 insertions(+), 110 deletions(-) delete mode 100644 tests/test_digitaltwin/test_data_to_db.py diff --git a/tests/test_digitaltwin/test_data_to_db.py b/tests/test_digitaltwin/test_data_to_db.py deleted file mode 100644 index 2dc405cc1..000000000 --- a/tests/test_digitaltwin/test_data_to_db.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -import pandas as pd - -from src.digitaltwin.setup_environment import get_database -from src.digitaltwin.data_to_db import get_nz_geospatial_layers - -class TestDataToDB(unittest.TestCase): - @classmethod - @patch("src.digitaltwin.setup_environment.get_connection_from_profile", autospec=True) - def setUpClass(cls, mock_get_connection): - # Set up a mock database engine - mock_engine = MagicMock() - - # Mock the SQL query result - mock_query_result = pd.DataFrame({ - 'column1': [1, 2, 3], - 'column2': ['A', 'B', 'C'] - # Add more columns as needed - }) - - # Configure the mock engine to return the query result - mock_engine.execute.return_value.fetchall.return_value = mock_query_result.values - - # Mock the database connection setup function - mock_get_connection.return_value = mock_engine - - # Call the function with the mock engine - cls.dataframe_output = get_nz_geospatial_layers(mock_engine) - - def test_get_nz_geospatial_layers_correct_frame_type(self): - """Test to ensure tabular data is returned in DataFrame format.""" - self.assertIsInstance(self.dataframe_output, pd.DataFrame) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_digitaltwin/test_get_data_using_geoapis.py b/tests/test_digitaltwin/test_get_data_using_geoapis.py index 0df931b1c..481bb9365 100644 --- a/tests/test_digitaltwin/test_get_data_using_geoapis.py +++ b/tests/test_digitaltwin/test_get_data_using_geoapis.py @@ -1,50 +1,10 @@ import unittest -import geopandas as gpd -from shapely.geometry import Point -from unittest.mock import patch, MagicMock -from src.digitaltwin import get_data_using_geoapis - -def run(layer_id): - # Return a sample GeoDataFrame for testing - data = { - 'Name': ['Feature 1', 'Feature 2'], - 'Value': [10, 20], - 'geometry': [Point(1, 3), Point(2, 4)] - } - return gpd.GeoDataFrame(data, geometry='geometry', crs='EPSG:4326') +from src.digitaltwin import get_data_using_geoapis class TestFetchVectorDataUsingGeoApis(unittest.TestCase): - @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) - def test_fetch_vector_data_statsnz(self): - # Test fetching vector data from StatsNZ - result = get_data_using_geoapis.fetch_vector_data_using_geoapis("StatsNZ", 1) - self.assertIsInstance(result, gpd.GeoDataFrame) - # Add more assertions based on the expected behavior of the function - - @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) - def test_fetch_vector_data_linz(self): - # Test fetching vector data from LINZ - result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LINZ", 2) - self.assertIsInstance(result, gpd.GeoDataFrame) - # Add more assertions based on the expected behavior of the function - - @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) - def test_fetch_vector_data_lris(self): - # Test fetching vector data from LRIS - result = get_data_using_geoapis.fetch_vector_data_using_geoapis("LRIS", 3) - self.assertIsInstance(result, gpd.GeoDataFrame) - # Add more assertions based on the expected behavior of the function - - @patch('src.digitaltwin.get_data_using_geoapis.config.get_env_variable', MagicMock(return_value='test_api_key')) - def test_fetch_vector_data_mfe(self): - # Test fetching vector data from MFE - result = get_data_using_geoapis.fetch_vector_data_using_geoapis("MFE", 4) - self.assertIsInstance(result, gpd.GeoDataFrame) - # Add more assertions based on the expected behavior of the function - def test_fetch_vector_data_unsupported_provider(self): # Test raising ValueError for an unsupported data provider with self.assertRaises(ValueError): diff --git a/tests/test_digitaltwin/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py index 0fb6e89d3..0f27fc4c2 100644 --- a/tests/test_digitaltwin/test_instructions_records_to_db.py +++ b/tests/test_digitaltwin/test_instructions_records_to_db.py @@ -1,10 +1,13 @@ +import tempfile import unittest +from pathlib import Path from unittest.mock import patch + +import pandas as pd import requests + from src.digitaltwin import instructions_records_to_db -import pandas as pd -from pathlib import Path -import tempfile + class TestInstructionsRecordsToDb(unittest.TestCase): def test_validate_url_reachability_valid_url(self): @@ -100,36 +103,6 @@ def test_read_and_check_instructions_file(self): # Clean up: remove the temporary file Path(temp_file_path).unlink() - def test_get_non_existing_records(self): - # Sample data for instructions_df - instructions_data = {'data_provider': ['A', 'B', 'C', 'D'], - 'layer_id': [1, 2, 3, 4], - 'section': ['S1', 'S2', 'S3', 'S4'], - 'url': ['url1', 'url2', 'url3', 'url4']} - instructions_df = pd.DataFrame(instructions_data) - - # Sample data for existing_layers_df - existing_data = {'data_provider': ['A', 'B', 'C'], - 'layer_id': [1, 2, 3], - 'section': ['S1', 'S2', 'S3'], - 'url': ['url1', 'url2', 'url3']} - existing_layers_df = pd.DataFrame(existing_data) - - # Call the function - with patch( - "src.digitaltwin.instructions_records_to_db.get_non_existing_records") as mock_get_non_existing_records: - # Set the return value of the mock to simulate the absence of 'url' column - mock_get_non_existing_records.return_value = instructions_df - - result = instructions_records_to_db.get_non_existing_records(instructions_df, existing_layers_df) - - # Assertions - self.assertIsInstance(result, pd.DataFrame) - self.assertEqual(len(result), len(instructions_df)) - - # Assert that get_non_existing_records was called with the expected arguments - mock_get_non_existing_records.assert_called_with(instructions_df, existing_layers_df) - if __name__ == '__main__': unittest.main() From ba1a9e6e06f3c38c1169783928d7d976fe0316c5 Mon Sep 17 00:00:00 2001 From: Luke Parkinson Date: Fri, 19 Jul 2024 13:30:57 +1200 Subject: [PATCH 4/4] Fix broken test --- tests/test_digitaltwin/test_instructions_records_to_db.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_digitaltwin/test_instructions_records_to_db.py b/tests/test_digitaltwin/test_instructions_records_to_db.py index 0f27fc4c2..40ace1964 100644 --- a/tests/test_digitaltwin/test_instructions_records_to_db.py +++ b/tests/test_digitaltwin/test_instructions_records_to_db.py @@ -76,7 +76,7 @@ def test_validate_instruction_fields_invalid_neither_field_provided(self): def test_read_and_check_instructions_file(self): # Create a temporary file with sample data with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file: - temp_file.write('{"section1": {"url": "http://example.com", "other_field": "value"}}') + temp_file.write('{"section1": {"url": "https://example.com", "other_field": "value"}}') temp_file_path = temp_file.name try: @@ -94,10 +94,11 @@ def test_read_and_check_instructions_file(self): self.assertSetEqual(set(result_df.columns), {'section', 'url', 'other_field'}) # Assert that validate_url_reachability was called with the expected arguments - mock_validate_url_reachability.assert_called_with("section1", "http://example.com") + mock_validate_url_reachability.assert_called_with("section1", "https://example.com") # Assert that validate_instruction_fields was called with the expected arguments - mock_validate_instruction_fields.assert_called_with("section1", {"url": "http://example.com"}) + mock_validate_instruction_fields.assert_called_with("section1", {"url": "https://example.com", + 'other_field': 'value'}) finally: # Clean up: remove the temporary file