From aca4e407dd6541316791469d285a35f91c114b77 Mon Sep 17 00:00:00 2001 From: Lavanya Ashokkumar Date: Thu, 2 Oct 2025 12:58:18 -0500 Subject: [PATCH 1/5] CMR latest version #355 --- pyQuARC/main.py | 76 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/pyQuARC/main.py b/pyQuARC/main.py index 6995b50c..d6af5bd0 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -138,6 +138,60 @@ def _cmr_query(self): query = f"{orig_query}&page_num={page_num}" return concept_ids + + def _get_latest_version(self, concept_id): + """ + Fetches the latest revision version for a given concept_id from CMR + + Args: + concept_id (str): The concept ID to query + + Returns: + str: The latest revision number, or None if not found + """ + try: + # Construct the CMR metadata URL for the concept + url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" + headers = get_headers() + response = requests.get(url, headers=headers) + + if response.status_code == 200: + # Extract revision-id from response headers + revision_id = response.headers.get('CMR-Revision-Id') + return revision_id + else: + print(f"Warning: Could not fetch latest version for {concept_id}. Using default.") + return None + except Exception as e: + print(f"Error fetching latest version for {concept_id}: {str(e)}") + return None + + def _get_collection_version(self, concept_id): + """ + Fetch the MetadataSpecification.Version of a collection from CMR. + Args: + concept_id (str): The concept ID to query. + + Returns: + str: The collection's MetadataSpecification.Version, or None if not found. + """ + try: + url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" + headers = get_headers() + response = requests.get(url, headers=headers) + + if response.status_code == 200: + data = response.json() + # UMM collections have MetadataSpecification.Version + version = data.get("MetadataSpecification", {}).get("Version") + return version + else: + print(f"Warning: Could not fetch metadata for {concept_id}.") + return None + except Exception as e: + print(f"Error fetching collection version for {concept_id}: {str(e)}") + return None + def _validate_with_cmr(self, concept_id, metadata_content): """ @@ -181,8 +235,20 @@ def validate(self): if self.concept_ids: for concept_id in tqdm(self.concept_ids): + # If no version specified, get the latest version + version_to_use = self.version + if not version_to_use: + version_to_use = self._get_latest_version(concept_id) + if version_to_use: + print(f"Using latest version {version_to_use} for {concept_id}") + + # Fetch schema version too + collection_version = self._get_collection_version(concept_id) + if collection_version: + print(f"Collection {concept_id} schema version: {collection_version}") + downloader = Downloader( - concept_id, self.metadata_format, self.version, self.cmr_host + concept_id, self.metadata_format, version_to_use, self.cmr_host ) if not (content := downloader.download()): self.errors.append( @@ -194,17 +260,11 @@ def validate(self): ) continue content = content.encode() - cmr_response = self._validate_with_cmr(concept_id, content) validation_errors, pyquarc_errors = checker.run(content) self.errors.append( { "concept_id": concept_id, "errors": validation_errors, - "cmr_validation": { - "errors": cmr_response.json().get("errors", []), - # TODO: show warnings - "warnings": cmr_response.json().get("warnings", []) - }, "pyquarc_errors": pyquarc_errors, } ) @@ -212,6 +272,7 @@ def validate(self): elif self.file_path: with open(os.path.abspath(self.file_path), "r") as myfile: content = myfile.read().encode() + validation_errors, pyquarc_errors = checker.run(content) self.errors.append( { @@ -388,3 +449,4 @@ def display_results(self): ) results = arc.validate() arc.display_results() + \ No newline at end of file From 649573063f31f564effd8ea40711f6f5c6803813 Mon Sep 17 00:00:00 2001 From: Lavanya Ashokkumar Date: Tue, 7 Oct 2025 14:23:00 -0500 Subject: [PATCH 2/5] Cmr latest version #355 - included changes in main.py --- pyQuARC/main.py | 5 ++++- requirements.txt | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyQuARC/main.py b/pyQuARC/main.py index d6af5bd0..4baf36f5 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -348,7 +348,10 @@ def display_results(self): f"\n\t {COLOR['title']}{COLOR['bright']} pyQuARC ERRORS: {END}\n" ) for error in pyquarc_errors: - error_prompt += f"\t\t ERROR: {error['type']}. Details: {error['details']} \n" + error_prompt += ( + f"\t\t ERROR: {error.get('message', 'No message available')} \n" + f"\t\t DETAILS: {error.get('details', 'No details available')} \n" + ) if cmr_validation := error.get("cmr_validation"): cmr_error_msg = self._format_cmr_error(cmr_validation) diff --git a/requirements.txt b/requirements.txt index 30aec17c..6432dc89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ colorama==0.4.4 idna==2.10 jsonschema==4.17.3 -lxml==4.9.1 +lxml==5.3.0 #4.9.1 pytest==5.4.3 pytz==2020.1 requests==2.24.0 From 40e520275db98036421b96c7a8e9dad14d0ed926 Mon Sep 17 00:00:00 2001 From: Lavanya Ashokkumar Date: Mon, 13 Oct 2025 17:11:44 -0500 Subject: [PATCH 3/5] Code changes for _get_collection_version -LA --- pyQuARC/main.py | 82 +++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/pyQuARC/main.py b/pyQuARC/main.py index 4baf36f5..87c5edcb 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -139,58 +139,37 @@ def _cmr_query(self): return concept_ids - def _get_latest_version(self, concept_id): - """ - Fetches the latest revision version for a given concept_id from CMR - - Args: - concept_id (str): The concept ID to query - - Returns: - str: The latest revision number, or None if not found - """ - try: - # Construct the CMR metadata URL for the concept - url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" - headers = get_headers() - response = requests.get(url, headers=headers) - - if response.status_code == 200: - # Extract revision-id from response headers - revision_id = response.headers.get('CMR-Revision-Id') - return revision_id - else: - print(f"Warning: Could not fetch latest version for {concept_id}. Using default.") - return None - except Exception as e: - print(f"Error fetching latest version for {concept_id}: {str(e)}") - return None def _get_collection_version(self, concept_id): """ - Fetch the MetadataSpecification.Version of a collection from CMR. + Fetches collection information from CMR for a given concept_id. Args: concept_id (str): The concept ID to query. - + info_type (str): Type of information to fetch. + Options: "revision" or "metadata_version". + Returns: - str: The collection's MetadataSpecification.Version, or None if not found. + str: The requested info (revision ID or MetadataSpecification.Version), or None if not found. """ try: url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" headers = get_headers() response = requests.get(url, headers=headers) - if response.status_code == 200: - data = response.json() - # UMM collections have MetadataSpecification.Version - version = data.get("MetadataSpecification", {}).get("Version") - return version - else: - print(f"Warning: Could not fetch metadata for {concept_id}.") - return None + if response.status_code != 200: + print(f"Warning: Could not fetch data for {concept_id}. Status: {response.status_code}") + return {"revision_id": None, "metadata_version": None} + + data = response.json() if response.content else {} + return { + "revision_id": response.headers.get("CMR-Revision-Id"), + "metadata_version": data.get("MetadataSpecification", {}).get("Version"), + } + except Exception as e: - print(f"Error fetching collection version for {concept_id}: {str(e)}") - return None + # Unified error handling — return dict even on failure + print(f"Error fetching collection info for {concept_id}: {str(e)}") + return {"revision_id": None, "metadata_version": None} def _validate_with_cmr(self, concept_id, metadata_content): @@ -236,17 +215,16 @@ def validate(self): if self.concept_ids: for concept_id in tqdm(self.concept_ids): # If no version specified, get the latest version - version_to_use = self.version - if not version_to_use: - version_to_use = self._get_latest_version(concept_id) - if version_to_use: - print(f"Using latest version {version_to_use} for {concept_id}") - - # Fetch schema version too - collection_version = self._get_collection_version(concept_id) - if collection_version: - print(f"Collection {concept_id} schema version: {collection_version}") - + # Get both revision and metadata version in one call + info = self._get_collection_version(concept_id) + version_to_use = self.version or info["revision_id"] + metadata_version = info["metadata_version"] + + if version_to_use: + print(f"Using latest revision {version_to_use} for {concept_id}") + if metadata_version: + print(f"Collection {concept_id} schema version: {metadata_version}") + downloader = Downloader( concept_id, self.metadata_format, version_to_use, self.cmr_host ) @@ -259,6 +237,7 @@ def validate(self): } ) continue + content = content.encode() validation_errors, pyquarc_errors = checker.run(content) self.errors.append( @@ -272,7 +251,6 @@ def validate(self): elif self.file_path: with open(os.path.abspath(self.file_path), "r") as myfile: content = myfile.read().encode() - validation_errors, pyquarc_errors = checker.run(content) self.errors.append( { @@ -281,8 +259,10 @@ def validate(self): "pyquarc_errors": pyquarc_errors, } ) + return self.errors + @staticmethod def _error_message(messages): severities = ["error", "warning", "info"] From ed5a9412cbe55e8937e4924b45df0293d81c8035 Mon Sep 17 00:00:00 2001 From: Slesa Adhikari Date: Thu, 16 Oct 2025 15:20:46 -0500 Subject: [PATCH 4/5] Refactor and undo cmr_validation removal --- pyQuARC/main.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pyQuARC/main.py b/pyQuARC/main.py index 87c5edcb..20621ea6 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -145,20 +145,16 @@ def _get_collection_version(self, concept_id): Fetches collection information from CMR for a given concept_id. Args: concept_id (str): The concept ID to query. - info_type (str): Type of information to fetch. - Options: "revision" or "metadata_version". Returns: - str: The requested info (revision ID or MetadataSpecification.Version), or None if not found. + dict: {"revision_id": str | None, "metadata_version": str | None } A dict of Revision ID and Metadata Version of the collection. """ + failure_return_value = {"revision_id": None, "metadata_version": None} try: url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" headers = get_headers() response = requests.get(url, headers=headers) - - if response.status_code != 200: - print(f"Warning: Could not fetch data for {concept_id}. Status: {response.status_code}") - return {"revision_id": None, "metadata_version": None} + response.raise_for_status() data = response.json() if response.content else {} return { @@ -169,7 +165,7 @@ def _get_collection_version(self, concept_id): except Exception as e: # Unified error handling — return dict even on failure print(f"Error fetching collection info for {concept_id}: {str(e)}") - return {"revision_id": None, "metadata_version": None} + return failure_return_value def _validate_with_cmr(self, concept_id, metadata_content): @@ -218,12 +214,7 @@ def validate(self): # Get both revision and metadata version in one call info = self._get_collection_version(concept_id) version_to_use = self.version or info["revision_id"] - metadata_version = info["metadata_version"] - - if version_to_use: - print(f"Using latest revision {version_to_use} for {concept_id}") - if metadata_version: - print(f"Collection {concept_id} schema version: {metadata_version}") + # metadata_version = info["metadata_version"] downloader = Downloader( concept_id, self.metadata_format, version_to_use, self.cmr_host @@ -239,11 +230,17 @@ def validate(self): continue content = content.encode() + cmr_response = self._validate_with_cmr(concept_id, content) validation_errors, pyquarc_errors = checker.run(content) self.errors.append( { "concept_id": concept_id, "errors": validation_errors, + "cmr_validation": { + "errors": cmr_response.json().get("errors", []), + # TODO: show warnings + "warnings": cmr_response.json().get("warnings", []) + }, "pyquarc_errors": pyquarc_errors, } ) From 24d990968ffe8cf68d981e0e15d09bde57ad1130 Mon Sep 17 00:00:00 2001 From: Slesa Adhikari Date: Thu, 16 Oct 2025 15:31:13 -0500 Subject: [PATCH 5/5] Readd concept id printing --- pyQuARC/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyQuARC/main.py b/pyQuARC/main.py index 20621ea6..49458433 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -214,7 +214,10 @@ def validate(self): # Get both revision and metadata version in one call info = self._get_collection_version(concept_id) version_to_use = self.version or info["revision_id"] - # metadata_version = info["metadata_version"] + + metadata_version = info["metadata_version"] + if metadata_version: + print(f"Collection {concept_id} schema version: {metadata_version}") downloader = Downloader( concept_id, self.metadata_format, version_to_use, self.cmr_host