From fb9cc5edbe490164b24cba5c301df7415879c144 Mon Sep 17 00:00:00 2001 From: Lavanya Ashokkumar Date: Wed, 4 Jun 2025 14:33:20 -0500 Subject: [PATCH 1/2] Datacenter email - revised #326 --- pyQuARC/code/schema_validator.py | 11 ++++++----- pyQuARC/code/url_validator.py | 11 +++++++++++ pyQuARC/schemas/check_messages_override.json | 11 ++++++++++- pyQuARC/schemas/checks_override.json | 8 +++++++- pyQuARC/schemas/rules_override.json | 18 +++++++++++++++++- tests/test_downloader.py | 4 ++-- 6 files changed, 53 insertions(+), 10 deletions(-) diff --git a/pyQuARC/code/schema_validator.py b/pyQuARC/code/schema_validator.py index 11b3f087..8423e348 100644 --- a/pyQuARC/code/schema_validator.py +++ b/pyQuARC/code/schema_validator.py @@ -3,7 +3,7 @@ import re from io import BytesIO -from jsonschema import Draft7Validator, draft7_format_checker, RefResolver +from jsonschema import Draft7Validator, RefResolver from lxml import etree from urllib.request import pathname2url @@ -91,7 +91,7 @@ def run_json_validator(self, content_to_validate): resolver = RefResolver.from_schema(schema, store=schema_store) validator = Draft7Validator( - schema, format_checker=draft7_format_checker, resolver=resolver + schema, format_checker=Draft7Validator.FORMAT_CHECKER, resolver=resolver ) for error in sorted( @@ -136,13 +136,14 @@ def _build_errors(error_log, paths): # For DIF, because the namespace is specified in the metadata file, lxml library # provides field name concatenated with the namespace, # the following 3 lines of code removes the namespace - namespaces = re.findall("(\{http[^}]*\})", line) + + namespaces = re.findall(r"(\{http[^}]*\})", line) for namespace in namespaces: line = line.replace(namespace, "") - field_name = re.search("Element\s'(.*)':", line)[1] + field_name = re.search(r"Element\s'(.*)':", line)[1] field_paths = [abs_path for abs_path in paths if field_name in abs_path] field_name = field_paths[0] if len(field_paths) == 1 else field_name - message = re.search("Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip() + message = re.search(r"Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip() errors.setdefault(field_name, {})["schema"] = { "message": [f"Error: {message}"], "valid": False, diff --git a/pyQuARC/code/url_validator.py b/pyQuARC/code/url_validator.py index 55a74e61..9b6befd6 100644 --- a/pyQuARC/code/url_validator.py +++ b/pyQuARC/code/url_validator.py @@ -117,3 +117,14 @@ def doi_link_update(value, bad_urls): validity = False return {"valid": validity, "value": value} + + @staticmethod + @if_arg + def url_update_email_check(url, bad_urls): + validity = True + # Check if the URL matches 'support-cddis@earthdata.nasa.gov' + if url in bad_urls or url == "support-cddis@earthdata.nasa.gov": + # Update the URL + url = "support-cddis@nasa.gov" + validity = False # Mark as invalid if the URL was updated + return {"valid": validity, "value": url} diff --git a/pyQuARC/schemas/check_messages_override.json b/pyQuARC/schemas/check_messages_override.json index 0967ef42..9f7673ed 100644 --- a/pyQuARC/schemas/check_messages_override.json +++ b/pyQuARC/schemas/check_messages_override.json @@ -1 +1,10 @@ -{} +{ + "url_update_email_check": { + "failure": "The listed email contact information must be updated.", + "help": { + "message": "Recommend providing the updated contact information as per the data product.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Data+Center" + }, + "remediation": "Recommend changing the contact information to 'support-cddis@nasa.gov'. " + } +} diff --git a/pyQuARC/schemas/checks_override.json b/pyQuARC/schemas/checks_override.json index 0967ef42..2f4d6fc1 100644 --- a/pyQuARC/schemas/checks_override.json +++ b/pyQuARC/schemas/checks_override.json @@ -1 +1,7 @@ -{} +{ + "url_update_email_check": { + "data_type": "url", + "check_function": "url_update_email_check", + "available": true + } +} diff --git a/pyQuARC/schemas/rules_override.json b/pyQuARC/schemas/rules_override.json index 0967ef42..2ecc4799 100644 --- a/pyQuARC/schemas/rules_override.json +++ b/pyQuARC/schemas/rules_override.json @@ -1 +1,17 @@ -{} +{ + "url_update_email_check": { + "rule_name": "URL Email address check", + "fields_to_apply": { + "umm-c": [ + { + "fields": [ + "DataCenters/ContactGroups/ContactInformation/ContactMechanisms/Value", + "DataCenters/ContactGroups/ContactInformation/ContactInstruction" + ] + } + ] + }, + "severity": "info", + "check_id": "url_update_email_check" +} +} diff --git a/tests/test_downloader.py b/tests/test_downloader.py index ddd7d5db..ca1762c8 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -9,11 +9,11 @@ class TestDownloader: def setup_method(self): self.concept_ids = { "collection": { - "real": "C1339230297-GES_DISC", + "real": "C1000000042-CDDIS", "dummy": "C123456-LPDAAC_ECS", }, "granule": { - "real": "G1370895082-GES_DISC", + "real": "G1018577631-CDDIS", "dummy": "G1000000002-CMR_PROV", }, "invalid": "asdfasdf", From a60d3eab8ed7a2b0ae4d0aa899d0d276f3628efa Mon Sep 17 00:00:00 2001 From: Lavanya Ashokkumar Date: Fri, 8 Aug 2025 15:17:11 -0500 Subject: [PATCH 2/2] Datacenter email - revised v1 #326 --- pyQuARC/schemas/check_messages.json | 8 ++++++++ pyQuARC/schemas/check_messages_override.json | 11 +---------- pyQuARC/schemas/checks.json | 5 +++++ pyQuARC/schemas/checks_override.json | 8 +------- pyQuARC/schemas/rule_mapping.json | 17 ++++++++++++++++- pyQuARC/schemas/rules_override.json | 18 +----------------- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index 0b8b38c8..92cfa877 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -47,6 +47,14 @@ }, "remediation": "This often indicates a broken link. If the URL is broken, recommend revising." }, + "url_update_email_check": { + "failure": "The listed email contact information must be updated.", + "help": { + "message": "Recommend providing the updated contact information as per the data product.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Data+Center" + }, + "remediation": "Recommend changing the contact information to 'support-cddis@nasa.gov'. " + }, "shortname_uniqueness": { "failure": "The EntryTitle/DataSetId `{}` is identical to the ShortName `{}`.", "help": { diff --git a/pyQuARC/schemas/check_messages_override.json b/pyQuARC/schemas/check_messages_override.json index 9f7673ed..0967ef42 100644 --- a/pyQuARC/schemas/check_messages_override.json +++ b/pyQuARC/schemas/check_messages_override.json @@ -1,10 +1 @@ -{ - "url_update_email_check": { - "failure": "The listed email contact information must be updated.", - "help": { - "message": "Recommend providing the updated contact information as per the data product.", - "url": "https://wiki.earthdata.nasa.gov/display/CMR/Data+Center" - }, - "remediation": "Recommend changing the contact information to 'support-cddis@nasa.gov'. " - } -} +{} diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index 778f4da3..d921ac28 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -24,6 +24,11 @@ "check_function": "health_and_status_check", "available": true }, + "url_update_email_check": { + "data_type": "url", + "check_function": "url_update_email_check", + "available": true + }, "string_compare": { "data_type": "string", "check_function": "compare", diff --git a/pyQuARC/schemas/checks_override.json b/pyQuARC/schemas/checks_override.json index 2f4d6fc1..0967ef42 100644 --- a/pyQuARC/schemas/checks_override.json +++ b/pyQuARC/schemas/checks_override.json @@ -1,7 +1 @@ -{ - "url_update_email_check": { - "data_type": "url", - "check_function": "url_update_email_check", - "available": true - } -} +{} diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 2e3acc41..214d6ddc 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -5559,5 +5559,20 @@ }, "severity": "warning", "check_id": "one_item_presence_check" - } + }, + "url_update_email_check": { + "rule_name": "URL Email address check", + "fields_to_apply": { + "umm-c": [ + { + "fields": [ + "DataCenters/ContactGroups/ContactInformation/ContactMechanisms/Value", + "DataCenters/ContactGroups/ContactInformation/ContactInstruction" + ] + } + ] + }, + "severity": "info", + "check_id": "url_update_email_check" +} } \ No newline at end of file diff --git a/pyQuARC/schemas/rules_override.json b/pyQuARC/schemas/rules_override.json index 2ecc4799..0967ef42 100644 --- a/pyQuARC/schemas/rules_override.json +++ b/pyQuARC/schemas/rules_override.json @@ -1,17 +1 @@ -{ - "url_update_email_check": { - "rule_name": "URL Email address check", - "fields_to_apply": { - "umm-c": [ - { - "fields": [ - "DataCenters/ContactGroups/ContactInformation/ContactMechanisms/Value", - "DataCenters/ContactGroups/ContactInformation/ContactInstruction" - ] - } - ] - }, - "severity": "info", - "check_id": "url_update_email_check" -} -} +{}