diff --git a/README.md b/README.md index 9331483e4..71e052422 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ Then run normally: `core.exe validate -rest -of -config -commands ### Updating the Cache (`update-cache`) -Update locally stored cache data (Requires an environment variable - `CDISC_LIBRARY_API_KEY`) This is stored in the .env folder in the root directory, the API key does not need quotations around it. When running a validation, CORE uses rules in the cache unless -lr is specified. Running the above command populates the cache with controlled terminology, rules, metadata, etc. +Update locally stored cache data (Requires an environment variable - `CDISC_LIBRARY_API_KEY`. If using local/private proxy set with `CDISC_LIBRARY_API_URL`, the API key could be set to `none`, depending on proxy.) This is stored in the .env folder in the root directory, the API key does not need quotations around it. When running a validation, CORE uses rules in the cache unless -lr is specified. Running the above command populates the cache with controlled terminology, rules, metadata, etc. ```bash python core.py update-cache @@ -267,6 +267,9 @@ Update locally stored cache data (Requires an environment variable - `CDISC_LIBR To obtain an api key, please follow the instructions found here: . Please note it can take up to an hour after sign up to have an api key issued +**Private caching/proxy:** +Update .env with `CDISC_LIBRARY_API_URL`. This private/local proxy might not need your personal CDISC Library API key anymore as the proxy server might be using it's own key to query CDISC Library API server. + ##### Custom Standards and Rules ###### Custom Rules Management diff --git a/TestRule/__init__.py b/TestRule/__init__.py index 3aa56cd28..bf21bf9c1 100644 --- a/TestRule/__init__.py +++ b/TestRule/__init__.py @@ -82,7 +82,8 @@ def convert_numpy_types(obj): def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: # noqa try: json_data = req.get_json() - api_key = os.environ.get("CDISC_LIBRARY_API_KEY") + api_key = os.environ.get("CDISC_LIBRARY_API_KEY", "") # Default to empty string + api_url = os.environ.get("CDISC_LIBRARY_API_URL", "") # Default to empty string rule = json_data.get("rule") standards_data = json_data.get("standard", {}) standard = standards_data.get("product") @@ -91,7 +92,7 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: # standard, standard_version = normalize_adam_input(standard, standard_version) codelists = json_data.get("codelists", []) cache = InMemoryCacheService() - library_service = CDISCLibraryService(api_key, cache) + library_service = CDISCLibraryService(api_key, api_url, cache) cache_populator: CachePopulator = CachePopulator(cache, library_service) asyncio.run(cache_populator.load_available_ct_packages()) if standards_data or codelists: diff --git a/cdisc_rules_engine/config/config.py b/cdisc_rules_engine/config/config.py index 068802f49..95186f712 100644 --- a/cdisc_rules_engine/config/config.py +++ b/cdisc_rules_engine/config/config.py @@ -25,6 +25,7 @@ def __new__(cls): "REDIS_HOST_NAME", "REDIS_ACCESS_KEY", "CDISC_LIBRARY_API_KEY", + "CDISC_LIBRARY_API_URL", "DATA_SERVICE_TYPE", "DATASET_SIZE_THRESHOLD", ] diff --git a/cdisc_rules_engine/services/cdisc_library_service.py b/cdisc_rules_engine/services/cdisc_library_service.py index a9dfb5acd..fd4a4c24f 100644 --- a/cdisc_rules_engine/services/cdisc_library_service.py +++ b/cdisc_rules_engine/services/cdisc_library_service.py @@ -16,13 +16,25 @@ class CDISCLibraryService: - def __init__(self, api_key, cache_service_obj): - self._api_key = api_key + def __init__(self, api_key, api_url, cache_service_obj): + self._api_key = api_key if api_key else "" + + if not api_url: # covers None and empty string + api_url = "https://api.library.cdisc.org/api" + + # Validation: If using the default CDISC Library URL, API key is required + if api_url == "https://api.library.cdisc.org/api" and not self._api_key: + raise ValueError( + "CDISC_LIBRARY_API_KEY is required when using the default CDISC Library API URL. " + "Either provide an API key or specify a custom CDISC_LIBRARY_API_URL which does not require an API key." + ) + self._client = CDISCLibraryClient( - self._api_key, base_api_url="https://api.library.cdisc.org/api" + self._api_key, + base_api_url=api_url, ) self.cache = cache_service_obj - + def cache_library_json(self, uri: str) -> dict: """ Makes a library request to the provided URI, diff --git a/cdisc_rules_engine/services/data_services/base_data_service.py b/cdisc_rules_engine/services/data_services/base_data_service.py index 6090e2e80..cedc23e42 100644 --- a/cdisc_rules_engine/services/data_services/base_data_service.py +++ b/cdisc_rules_engine/services/data_services/base_data_service.py @@ -104,8 +104,11 @@ def __init__( self.cache_service = cache_service self._reader_factory = reader_factory self._config = config + self.cdisc_library_service: CDISCLibraryService = CDISCLibraryService( - self._config.getValue("CDISC_LIBRARY_API_KEY", ""), self.cache_service + self._config.getValue("CDISC_LIBRARY_API_KEY", ""), # Default to empty string + self._config.getValue("CDISC_LIBRARY_API_URL", ""), # Default to empty string + self.cache_service ) self.standard = kwargs.get("standard") self.version = (kwargs.get("standard_version") or "").replace(".", "-") diff --git a/core.py b/core.py index 681cdd85c..9792aa8c8 100644 --- a/core.py +++ b/core.py @@ -508,7 +508,19 @@ def validate( "Can be provided in the environment " "variable CDISC_LIBRARY_API_KEY" ), - required=True, + required=False, + default="", +) +@click.option( + "--apiurl", + envvar="CDISC_LIBRARY_API_URL", + help=( + "CDISC Library api URL (HTTPS/HTTP). Default: https://library.cdisc.org/api. " + "Can be provided in the environment " + "variable CDISC_LIBRARY_API_URL" + ), + required=False, + default="", ) @click.option( "-crd", @@ -562,6 +574,7 @@ def update_cache( ctx: click.Context, cache_path: str, apikey: str, + apiurl: str, custom_rules_directory: str, custom_rule: str, remove_custom_rules: str, @@ -569,8 +582,21 @@ def update_cache( custom_standard: str, remove_custom_standard: str, ): + logger = logging.getLogger("validator") + + # Validation: Ensure at least one is provided when using default URL + effective_url = apiurl if apiurl else "https://library.cdisc.org/api" + if effective_url == "https://library.cdisc.org/api" and not apikey: + logger.error( + "CDISC_LIBRARY_API_KEY is required when using the default CDISC Library API URL.\n" + "Either provide --apikey or set CDISC_LIBRARY_API_KEY environment variable,\n" + "or specify a custom URL with --apiurl or CDISC_LIBRARY_API_URL environment variable," + "which does or does not require specific API key for proxy access" + ) + ctx.exit(2) + cache = CacheServiceFactory(config).get_cache_service() - library_service = CDISCLibraryService(apikey, cache) + library_service = CDISCLibraryService(apikey, apiurl, cache) cache_populator = CachePopulator( cache, library_service, @@ -597,7 +623,6 @@ def update_cache( print("Cache updated successfully") - @click.command() @click.option( "-c", diff --git a/env.example b/env.example index b9849a804..9c34f7632 100644 --- a/env.example +++ b/env.example @@ -1,4 +1,5 @@ CDISC_LIBRARY_API_KEY=your_api_key_here +CDISC_LIBRARY_API_URL=http://localhost:31415/api # smart proxy server will not use CDISC_LIBRARY_API_KEY, but will might need it's own key to query upstream server. DATASET_SIZE_THRESHOLD=10485760 # max dataset size in bytes to force dask implementation MAX_REPORT_ROWS = 10 # integer for maximum number of issues per excel sheet (plus headers) in result report MAX_ERRORS_PER_RULE = (10, True) # Tuple for maximum number of errors to report per rule during a validation run. Also has a per dataset flag described as second bool value in readme. example value diff --git a/tests/unit/test_cdisc_library_service.py b/tests/unit/test_cdisc_library_service.py index 4728c119a..76c3b50a4 100644 --- a/tests/unit/test_cdisc_library_service.py +++ b/tests/unit/test_cdisc_library_service.py @@ -5,12 +5,34 @@ import json import os +import pytest from unittest.mock import MagicMock, patch from cdisc_rules_engine.config import config from cdisc_rules_engine.services.cdisc_library_service import CDISCLibraryService +def test_library_service_requires_key_for_default_url(): + """Test that API key is required when using default CDISC Library URL.""" + with pytest.raises(ValueError, match="CDISC_LIBRARY_API_KEY is required"): + CDISCLibraryService("", "", MagicMock()) + + +def test_library_service_allows_custom_url_without_key(): + """Test that custom URL works without API key.""" + # Should not raise an error + library_service = CDISCLibraryService( + "", "https://custom-library.example.com/api", MagicMock() + ) + assert library_service is not None + + +def test_library_service_with_key_and_default_url(): + """Test normal case with API key and default URL.""" + library_service = CDISCLibraryService("test-key", "", MagicMock()) + assert library_service is not None + + @patch( "cdisc_rules_engine.services.cdisc_library_service.CDISCLibraryClient.get_sdtmig" ) @@ -24,7 +46,7 @@ def test_get_standard_details(mock_get_sdtmig: MagicMock): mock_sdtmig_details: dict = json.loads(file.read()) mock_get_sdtmig.return_value = mock_sdtmig_details - library_service = CDISCLibraryService(config, MagicMock()) + library_service = CDISCLibraryService(config, "", MagicMock()) standard_details: dict = library_service.get_standard_details("sdtmig", "3-1-2") # expected is that mocked sdtmig details is extended with "domains" key assert standard_details == {