diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 87276b6c..3bb6e130 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,6 +36,29 @@ jobs: id: read-version run: echo "version=$(cat c2pa-native-version.txt | tr -d '\r\n')" >> $GITHUB_OUTPUT + check-format: + name: Check code format + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install development dependencies + run: python -m pip install -r requirements-dev.txt + + - name: Check Python syntax + run: python3 -m py_compile src/c2pa/c2pa.py + continue-on-error: true + + - name: Check code style with flake8 + run: flake8 src/c2pa/c2pa.py + continue-on-error: true + tests-unix: name: Unit tests for developer setup (Unix) needs: read-version diff --git a/pyproject.toml b/pyproject.toml index 83afff78..7a98b4a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "c2pa-python" -version = "0.25.0" +version = "0.26.0" requires-python = ">=3.10" description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library" readme = { file = "README.md", content-type = "text/markdown" } diff --git a/src/c2pa/c2pa.py b/src/c2pa/c2pa.py index 35dd457e..99617750 100644 --- a/src/c2pa/c2pa.py +++ b/src/c2pa/c2pa.py @@ -1358,6 +1358,10 @@ def __init__(self, # we may have opened ourselves, and that we need to close later self._backing_file = None + # Caches for manifest JSON string and parsed data + self._manifest_json_str_cache = None + self._manifest_data_cache = None + if stream is None: # If we don't get a stream as param: # Create a stream from the file path in format_or_path @@ -1600,6 +1604,33 @@ def _cleanup_resources(self): # Ensure we don't raise exceptions during cleanup pass + def _get_cached_manifest_data(self) -> Optional[dict]: + """Get the cached manifest data, fetching and parsing if not cached. + + Returns: + A dictionary containing the parsed manifest data, or None if + JSON parsing fails + + Raises: + C2paError: If there was an error getting the JSON + """ + if self._manifest_data_cache is None: + if self._manifest_json_str_cache is None: + self._manifest_json_str_cache = self.json() + + try: + self._manifest_data_cache = json.loads( + self._manifest_json_str_cache + ) + except json.JSONDecodeError: + # Reset cache to reattempt read, possibly + self._manifest_data_cache = None + self._manifest_json_str_cache = None + # Failed to parse manifest JSON + return None + + return self._manifest_data_cache + def close(self): """Release the reader resources. @@ -1620,6 +1651,9 @@ def close(self): Reader._ERROR_MESSAGES['cleanup_error'].format( str(e))) finally: + # Clear the cache when closing + self._manifest_json_str_cache = None + self._manifest_data_cache = None self._closed = True def json(self) -> str: @@ -1634,6 +1668,10 @@ def json(self) -> str: self._ensure_valid_state() + # Return cached result if available + if self._manifest_json_str_cache is not None: + return self._manifest_json_str_cache + result = _lib.c2pa_reader_json(self._reader) if result is None: @@ -1642,7 +1680,128 @@ def json(self) -> str: raise C2paError(error) raise C2paError("Error during manifest parsing in Reader") - return _convert_to_py_string(result) + # Cache the result and return it + self._manifest_json_str_cache = _convert_to_py_string(result) + return self._manifest_json_str_cache + + def get_active_manifest(self) -> Optional[dict]: + """Get the active manifest from the manifest store. + + This method retrieves the full manifest JSON and extracts the active + manifest based on the active_manifest key. + + Returns: + A dictionary containing the active manifest data, including claims, + assertions, ingredients, and signature information, or None if no + manifest is found or if there was an error parsing the JSON. + + Raises: + KeyError: If the active_manifest key is missing from the JSON + """ + try: + # Get cached manifest data + manifest_data = self._get_cached_manifest_data() + if manifest_data is None: + # raise C2paError("Failed to parse manifest JSON") + return None + + # Get the active manfiest id/label + if "active_manifest" not in manifest_data: + raise KeyError("No 'active_manifest' key found") + + active_manifest_id = manifest_data["active_manifest"] + + # Retrieve the active manifest data using manifest id/label + if "manifests" not in manifest_data: + raise KeyError("No 'manifests' key found in manifest data") + + manifests = manifest_data["manifests"] + if active_manifest_id not in manifests: + raise KeyError("Active manifest not found in manifest store") + + return manifests[active_manifest_id] + except C2paError.ManifestNotFound: + return None + + def get_manifest(self, label: str) -> Optional[dict]: + """Get a specific manifest from the manifest store by its label. + + This method retrieves the manifest JSON and extracts the manifest + that corresponds to the provided manifest label/ID. + + Args: + label: The manifest label/ID to look up in the manifest store + + Returns: + A dictionary containing the manifest data for the specified label, + or None if no manifest is found or if there was an error parsing + the JSON. + + Raises: + KeyError: If the manifests key is missing from the JSON + """ + try: + # Get cached manifest data + manifest_data = self._get_cached_manifest_data() + if manifest_data is None: + # raise C2paError("Failed to parse manifest JSON") + return None + + if "manifests" not in manifest_data: + raise KeyError("No 'manifests' key found in manifest data") + + manifests = manifest_data["manifests"] + if label not in manifests: + raise KeyError(f"Manifest {label} not found in manifest store") + + return manifests[label] + except C2paError.ManifestNotFound: + return None + + def get_validation_state(self) -> Optional[str]: + """Get the validation state of the manifest store. + + This method retrieves the full manifest JSON and extracts the + validation_state field, which indicates the overall validation + status of the C2PA manifest. + + Returns: + The validation state as a string, + or None if the validation_state field is not present or if no + manifest is found or if there was an error parsing the JSON. + """ + try: + # Get cached manifest data + manifest_data = self._get_cached_manifest_data() + if manifest_data is None: + return None + + return manifest_data.get("validation_state") + except C2paError.ManifestNotFound: + return None + + def get_validation_results(self) -> Optional[dict]: + """Get the validation results of the manifest store. + + This method retrieves the full manifest JSON and extracts + the validation_results object, which contains detailed + validation information. + + Returns: + The validation results as a dictionary containing + validation details, or None if the validation_results + field is not present or if no manifest is found or if + there was an error parsing the JSON. + """ + try: + # Get cached manifest data + manifest_data = self._get_cached_manifest_data() + if manifest_data is None: + return None + + return manifest_data.get("validation_results") + except C2paError.ManifestNotFound: + return None def resource_to_stream(self, uri: str, stream: Any) -> int: """Write a resource to a stream. diff --git a/tests/test_unit_tests.py b/tests/test_unit_tests.py index 6904ca86..7575aba7 100644 --- a/tests/test_unit_tests.py +++ b/tests/test_unit_tests.py @@ -64,6 +64,72 @@ def test_stream_read(self): json_data = reader.json() self.assertIn(DEFAULT_TEST_FILE_NAME, json_data) + def test_get_active_manifest(self): + with open(self.testPath, "rb") as file: + reader = Reader("image/jpeg", file) + active_manifest = reader.get_active_manifest() + + # Check the returned manifest label/key + expected_label = "contentauth:urn:uuid:c85a2b90-f1a0-4aa4-b17f-f938b475804e" + self.assertEqual(active_manifest["label"], expected_label) + + def test_get_manifest(self): + with open(self.testPath, "rb") as file: + reader = Reader("image/jpeg", file) + + # Test getting manifest by the specific label + label = "contentauth:urn:uuid:c85a2b90-f1a0-4aa4-b17f-f938b475804e" + manifest = reader.get_manifest(label) + self.assertEqual(manifest["label"], label) + + # It should be the active manifest too, so cross-check + active_manifest = reader.get_active_manifest() + self.assertEqual(manifest, active_manifest) + + def test_stream_get_non_active_manifest_by_label(self): + video_path = os.path.join(FIXTURES_DIR, "video1.mp4") + with open(video_path, "rb") as file: + reader = Reader("video/mp4", file) + + non_active_label = "urn:uuid:54281c07-ad34-430e-bea5-112a18facf0b" + non_active_manifest = reader.get_manifest(non_active_label) + self.assertEqual(non_active_manifest["label"], non_active_label) + + # Verify it's not the active manifest + # (that test case has only one other manifest that is not the active manifest) + active_manifest = reader.get_active_manifest() + self.assertNotEqual(non_active_manifest, active_manifest) + self.assertNotEqual(non_active_manifest["label"], active_manifest["label"]) + + def test_stream_get_non_active_manifest_by_label_not_found(self): + video_path = os.path.join(FIXTURES_DIR, "video1.mp4") + with open(video_path, "rb") as file: + reader = Reader("video/mp4", file) + + # Try to get a manifest with a label that clearly doesn't exist... + non_existing_label = "urn:uuid:clearly-not-existing" + with self.assertRaises(KeyError): + reader.get_manifest(non_existing_label) + + def test_stream_read_get_validation_state(self): + with open(self.testPath, "rb") as file: + reader = Reader("image/jpeg", file) + validation_state = reader.get_validation_state() + self.assertIsNotNone(validation_state) + self.assertEqual(validation_state, "Valid") + + def test_stream_read_get_validation_results(self): + with open(self.testPath, "rb") as file: + reader = Reader("image/jpeg", file) + validation_results = reader.get_validation_results() + + self.assertIsNotNone(validation_results) + self.assertIsInstance(validation_results, dict) + + self.assertIn("activeManifest", validation_results) + active_manifest_results = validation_results["activeManifest"] + self.assertIsInstance(active_manifest_results, dict) + def test_reader_detects_unsupported_mimetype_on_stream(self): with open(self.testPath, "rb") as file: with self.assertRaises(Error.NotSupported): @@ -270,6 +336,115 @@ def test_read_all_files_using_extension(self): except Exception as e: self.fail(f"Failed to read metadata from {filename}: {str(e)}") + def test_read_cached_all_files(self): + """Test reading C2PA metadata with cache functionality from all files in the fixtures/files-for-reading-tests directory""" + reading_dir = os.path.join(self.data_dir, "files-for-reading-tests") + + # Map of file extensions to MIME types + mime_types = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.heic': 'image/heic', + '.heif': 'image/heif', + '.avif': 'image/avif', + '.tif': 'image/tiff', + '.tiff': 'image/tiff', + '.mp4': 'video/mp4', + '.avi': 'video/x-msvideo', + '.mp3': 'audio/mpeg', + '.m4a': 'audio/mp4', + '.wav': 'audio/wav', + '.pdf': 'application/pdf', + } + + # Skip system files + skip_files = { + '.DS_Store' + } + + for filename in os.listdir(reading_dir): + if filename in skip_files: + continue + + file_path = os.path.join(reading_dir, filename) + if not os.path.isfile(file_path): + continue + + # Get file extension and corresponding MIME type + _, ext = os.path.splitext(filename) + ext = ext.lower() + if ext not in mime_types: + continue + + mime_type = mime_types[ext] + + try: + with open(file_path, "rb") as file: + reader = Reader(mime_type, file) + + # Test 1: Verify cache variables are initially None + self.assertIsNone(reader._manifest_json_str_cache, f"JSON cache should be None initially for {filename}") + self.assertIsNone(reader._manifest_data_cache, f"Manifest data cache should be None initially for {filename}") + + # Test 2: Multiple calls to json() should return the same result and use cache + json_data_1 = reader.json() + self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache not set after first json() call for {filename}") + self.assertEqual(json_data_1, reader._manifest_json_str_cache, f"JSON cache doesn't match return value for {filename}") + + json_data_2 = reader.json() + self.assertEqual(json_data_1, json_data_2, f"JSON inconsistency for {filename}") + self.assertIsInstance(json_data_1, str) + + # Test 3: Test methods that use the cache + try: + # Test get_active_manifest() which uses _get_cached_manifest_data() + active_manifest = reader.get_active_manifest() + self.assertIsInstance(active_manifest, dict, f"Active manifest not dict for {filename}") + + # Test 4: Verify cache is set after calling cache-using methods + self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache not set after get_active_manifest for {filename}") + self.assertIsNotNone(reader._manifest_data_cache, f"Manifest data cache not set after get_active_manifest for {filename}") + + # Test 5: Multiple calls to cache-using methods should return the same result + active_manifest_2 = reader.get_active_manifest() + self.assertEqual(active_manifest, active_manifest_2, f"Active manifest cache inconsistency for {filename}") + + # Test get_validation_state() which uses the cache + validation_state = reader.get_validation_state() + # validation_state can be None, so just check it doesn't crash + + # Test get_validation_results() which uses the cache + validation_results = reader.get_validation_results() + # validation_results can be None, so just check it doesn't crash + + # Test 6: Multiple calls to validation methods should return the same result + validation_state_2 = reader.get_validation_state() + self.assertEqual(validation_state, validation_state_2, f"Validation state cache inconsistency for {filename}") + + validation_results_2 = reader.get_validation_results() + self.assertEqual(validation_results, validation_results_2, f"Validation results cache inconsistency for {filename}") + + except KeyError as e: + # Some files might not have active manifests or validation data + # This is expected for some test files, so we'll skip cache testing for those + pass + + # Test 7: Verify the manifest contains expected fields + manifest = json.loads(json_data_1) + self.assertIn("manifests", manifest) + self.assertIn("active_manifest", manifest) + + # Test 8: Test cache clearing on close + reader.close() + self.assertIsNone(reader._manifest_json_str_cache, f"JSON cache not cleared for {filename}") + self.assertIsNone(reader._manifest_data_cache, f"Manifest data cache not cleared for {filename}") + + except Exception as e: + self.fail(f"Failed to read cached metadata from {filename}: {str(e)}") + def test_reader_context_manager_with_exception(self): """Test Reader state after exception in context manager.""" try: @@ -430,6 +605,67 @@ def test_reader_get_remote_url(self): self.assertEqual(remote_url, "https://cai-manifests.adobe.com/manifests/adobe-urn-uuid-5f37e182-3687-462e-a7fb-573462780391") self.assertFalse(reader.is_embedded()) + def test_stream_read_and_parse_cached(self): + """Test reading and parsing with cache verification by repeating operations multiple times""" + with open(self.testPath, "rb") as file: + reader = Reader("image/jpeg", file) + + # Verify cache starts as None + self.assertIsNone(reader._manifest_json_str_cache, "JSON cache should be None initially") + self.assertIsNone(reader._manifest_data_cache, "Manifest data cache should be None initially") + + # First operation - should populate cache + manifest_store_1 = json.loads(reader.json()) + title_1 = manifest_store_1["manifests"][manifest_store_1["active_manifest"]]["title"] + self.assertEqual(title_1, DEFAULT_TEST_FILE_NAME) + + # Verify cache is populated after first json() call + self.assertIsNotNone(reader._manifest_json_str_cache, "JSON cache should be set after first json() call") + self.assertEqual(manifest_store_1, json.loads(reader._manifest_json_str_cache), "Cached JSON should match parsed result") + + # Repeat the same operation multiple times to verify cache usage + for i in range(5): + manifest_store = json.loads(reader.json()) + title = manifest_store["manifests"][manifest_store["active_manifest"]]["title"] + self.assertEqual(title, DEFAULT_TEST_FILE_NAME, f"Title should be consistent on iteration {i+1}") + + # Verify cache is still populated and consistent + self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache should remain set on iteration {i+1}") + self.assertEqual(manifest_store, json.loads(reader._manifest_json_str_cache), f"Cached JSON should match parsed result on iteration {i+1}") + + # Test methods that use the cache + # Test get_active_manifest() which uses _get_cached_manifest_data() + active_manifest_1 = reader.get_active_manifest() + self.assertIsInstance(active_manifest_1, dict, "Active manifest should be a dict") + + # Verify manifest data cache is populated + self.assertIsNotNone(reader._manifest_data_cache, "Manifest data cache should be set after get_active_manifest()") + + # Repeat get_active_manifest() multiple times to verify cache usage + for i in range(3): + active_manifest = reader.get_active_manifest() + self.assertEqual(active_manifest_1, active_manifest, f"Active manifest should be consistent on iteration {i+1}") + + # Verify cache remains populated + self.assertIsNotNone(reader._manifest_data_cache, f"Manifest data cache should remain set on iteration {i+1}") + + # Test get_validation_state() and get_validation_results() with cache + validation_state_1 = reader.get_validation_state() + validation_results_1 = reader.get_validation_results() + + # Repeat validation methods to verify cache usage + for i in range(3): + validation_state = reader.get_validation_state() + validation_results = reader.get_validation_results() + + self.assertEqual(validation_state_1, validation_state, f"Validation state should be consistent on iteration {i+1}") + self.assertEqual(validation_results_1, validation_results, f"Validation results should be consistent on iteration {i+1}") + + # Verify cache clearing on close + reader.close() + self.assertIsNone(reader._manifest_json_str_cache, "JSON cache should be cleared on close") + self.assertIsNone(reader._manifest_data_cache, "Manifest data cache should be cleared on close") + # TODO: Unskip once fixed configuration to read data is clarified # def test_read_cawg_data_file(self): # """Test reading C2PA metadata from C_with_CAWG_data.jpg file.""" diff --git a/tests/test_unit_tests_threaded.py b/tests/test_unit_tests_threaded.py index 3d3b6f1e..9a00dd6a 100644 --- a/tests/test_unit_tests_threaded.py +++ b/tests/test_unit_tests_threaded.py @@ -165,6 +165,156 @@ def process_file(filename): if errors: self.fail("\n".join(errors)) + def test_read_cached_all_files(self): + """Test reading C2PA metadata with cache functionality from all files in the fixtures/files-for-reading-tests directory using multithreading""" + reading_dir = os.path.join(self.data_dir, "files-for-reading-tests") + + # Map of file extensions to MIME types + mime_types = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.heic': 'image/heic', + '.heif': 'image/heif', + '.avif': 'image/avif', + '.tif': 'image/tiff', + '.tiff': 'image/tiff', + '.mp4': 'video/mp4', + '.avi': 'video/x-msvideo', + '.mp3': 'audio/mpeg', + '.m4a': 'audio/mp4', + '.wav': 'audio/wav', + '.pdf': 'application/pdf', + } + + # Skip system files + skip_files = { + '.DS_Store' + } + + def process_file_with_cache(filename): + if filename in skip_files: + return None + + file_path = os.path.join(reading_dir, filename) + if not os.path.isfile(file_path): + return None + + # Get file extension and corresponding MIME type + _, ext = os.path.splitext(filename) + ext = ext.lower() + if ext not in mime_types: + return None + + mime_type = mime_types[ext] + + try: + with open(file_path, "rb") as file: + reader = Reader(mime_type, file) + + # Test 1: Verify cache variables are initially None + if reader._manifest_json_str_cache is not None: + return f"JSON cache should be None initially for {filename}" + if reader._manifest_data_cache is not None: + return f"Manifest data cache should be None initially for {filename}" + + # Test 2: Multiple calls to json() should return the same result and use cache + json_data_1 = reader.json() + if reader._manifest_json_str_cache is None: + return f"JSON cache not set after first json() call for {filename}" + if json_data_1 != reader._manifest_json_str_cache: + return f"JSON cache doesn't match return value for {filename}" + + json_data_2 = reader.json() + if json_data_1 != json_data_2: + return f"JSON inconsistency for {filename}" + if not isinstance(json_data_1, str): + return f"JSON data is not a string for {filename}" + + # Test 3: Test methods that use the cache + try: + # Test get_active_manifest() which uses _get_cached_manifest_data() + active_manifest = reader.get_active_manifest() + if not isinstance(active_manifest, dict): + return f"Active manifest not dict for {filename}" + + # Test 4: Verify cache is set after calling cache-using methods + if reader._manifest_json_str_cache is None: + return f"JSON cache not set after get_active_manifest for {filename}" + if reader._manifest_data_cache is None: + return f"Manifest data cache not set after get_active_manifest for {filename}" + + # Test 5: Multiple calls to cache-using methods should return the same result + active_manifest_2 = reader.get_active_manifest() + if active_manifest != active_manifest_2: + return f"Active manifest cache inconsistency for {filename}" + + # Test get_validation_state() which uses the cache + validation_state = reader.get_validation_state() + # validation_state can be None, so just check it doesn't crash + + # Test get_validation_results() which uses the cache + validation_results = reader.get_validation_results() + # validation_results can be None, so just check it doesn't crash + + # Test 6: Multiple calls to validation methods should return the same result + validation_state_2 = reader.get_validation_state() + if validation_state != validation_state_2: + return f"Validation state cache inconsistency for {filename}" + + validation_results_2 = reader.get_validation_results() + if validation_results != validation_results_2: + return f"Validation results cache inconsistency for {filename}" + + except KeyError: + # Some files might not have active manifests or validation data + # This is expected for some test files, so we'll skip cache testing for those + pass + + # Test 7: Verify the manifest contains expected fields + manifest = json.loads(json_data_1) + if "manifests" not in manifest: + return f"Missing 'manifests' key in {filename}" + if "active_manifest" not in manifest: + return f"Missing 'active_manifest' key in {filename}" + + # Test 8: Test cache clearing on close + reader.close() + if reader._manifest_json_str_cache is not None: + return f"JSON cache not cleared for {filename}" + if reader._manifest_data_cache is not None: + return f"Manifest data cache not cleared for {filename}" + + return None # Success case returns None + + except Exception as e: + return f"Failed to read cached metadata from {filename}: {str(e)}" + + # Create a thread pool with 6 workers + with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor: + # Submit all files to the thread pool + future_to_file = { + executor.submit(process_file_with_cache, filename): filename + for filename in os.listdir(reading_dir) + } + + # Collect results as they complete + errors = [] + for future in concurrent.futures.as_completed(future_to_file): + filename = future_to_file[future] + try: + error = future.result() + if error: + errors.append(error) + except Exception as e: + errors.append( + f"Unexpected error processing {filename}: {str(e)}") + + # If any errors occurred, fail the test with all error messages + if errors: + self.fail("\n".join(errors)) class TestBuilderWithThreads(unittest.TestCase): def setUp(self):