diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5972f21..3a77685 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.0.1 +current_version = 2.0.2 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P(rc|dev))(?P\d+))? diff --git a/focus_validator/config_objects/json_loader.py b/focus_validator/config_objects/json_loader.py index 0c318bc..96d191c 100644 --- a/focus_validator/config_objects/json_loader.py +++ b/focus_validator/config_objects/json_loader.py @@ -129,14 +129,14 @@ def load_json_rules_with_dependencies_and_types( focus_dataset: Optional[str] = "", filter_rules: Optional[str] = None, applicability_criteria_list: Optional[List[str]] = None, - ) -> Tuple[ValidationPlan, Dict[str, str]]: + ) -> Tuple[ValidationPlan, Dict[str, str], Dict[str, Any]]: """ Load CR JSON, build the dependency graph with RuleDependencyResolver, select relevant rules, and return both an execution-ready ValidationPlan (parents preserved, topo-ordered nodes + layers) and a column type mapping. Returns: - Tuple of (ValidationPlan, Dict[column_name, pandas_dtype]) + Tuple of (ValidationPlan, Dict[column_name, pandas_dtype], Dict[model_data]) """ model_data = JsonLoader.load_json_rules(json_rule_file) @@ -227,4 +227,4 @@ def load_json_rules_with_dependencies_and_types( exec_ctx=None, # supply a runtime context later if you want gated edges ) - return val_plan, column_types + return val_plan, column_types, model_data diff --git a/focus_validator/rules/model-1.2.json b/focus_validator/rules/model-1.2.0.1.json similarity index 99% rename from focus_validator/rules/model-1.2.json rename to focus_validator/rules/model-1.2.0.1.json index c301ada..d086427 100644 --- a/focus_validator/rules/model-1.2.json +++ b/focus_validator/rules/model-1.2.0.1.json @@ -1,6 +1,6 @@ { "Details": { - "ModelVersion": "1.2", + "ModelVersion": "1.2.0.1", "FOCUSVersion": "1.2" }, "ApplicabilityCriteria": { @@ -9713,7 +9713,7 @@ }, "Condition": { "CheckFunction": "CheckNotValue", - "CheckCondition": "ResourceId", + "ColumnName": "ResourceId", "Value": null }, "Dependencies": [ @@ -12674,7 +12674,7 @@ "MustSatisfy": "CommitmentDiscountStatus MUST be null when CommitmentDiscountId is null.", "Keyword": "MUST", "Requirement": { - "CheckFunction": "CheckNotValue", + "CheckFunction": "CheckValue", "ColumnName": "CommitmentDiscountStatus", "Value": null }, diff --git a/focus_validator/rules/spec_rules.py b/focus_validator/rules/spec_rules.py index c14af87..3822105 100644 --- a/focus_validator/rules/spec_rules.py +++ b/focus_validator/rules/spec_rules.py @@ -55,8 +55,12 @@ def __init__( ): self.rule_set_path = rule_set_path self.rules_file_prefix = rules_file_prefix - self.rules_version = rules_version - self.model_version = "Unknown" # Will be loaded from JSON file + self.rules_version = ( + rules_version # Will be overridden by FOCUSVersion from JSON Details + ) + self.model_version = ( + "Unknown" # Will be loaded from ModelVersion in JSON Details + ) self.rules_file_suffix = rules_file_suffix self.focus_dataset = focus_dataset self.filter_rules = filter_rules @@ -82,9 +86,18 @@ def __init__( self.local_supported_versions, ) self.remote_versions = {} - if self.rules_block_remote_download and ( - self.rules_version not in self.local_supported_versions - ): + + # Build dict of local versions for semantic matching + local_versions_dict = { + v: {"source": "local"} for v in self.local_supported_versions + } + + # Try semantic version matching for local versions first + matched_version = self._find_best_version_match( + self.rules_version, local_versions_dict + ) + + if self.rules_block_remote_download and matched_version is None: self.log.error( "Version %s not found in local versions and remote download blocked", self.rules_version, @@ -92,14 +105,11 @@ def __init__( raise UnsupportedVersion( f"FOCUS version {self.rules_version} not supported. Supported versions: local {self.local_supported_versions}" ) - elif ( - self.rules_force_remote_download - or self.rules_version not in self.local_supported_versions - ): + elif self.rules_force_remote_download or matched_version is None: self.log.info( "Remote rule download needed (force: %s, version available locally: %s)", self.rules_force_remote_download, - self.rules_version in self.local_supported_versions, + matched_version is not None, ) self.log.debug("Fetching remote supported versions...") @@ -110,7 +120,12 @@ def __init__( self.remote_supported_versions, ) - if self.rules_version not in self.remote_supported_versions: + # Try semantic version matching for remote versions + matched_version = self._find_best_version_match( + self.rules_version, self.remote_versions + ) + + if matched_version is None: self.log.error( "Version %s not found in remote versions", self.rules_version ) @@ -119,13 +134,19 @@ def __init__( ) else: self.log.info( - "Downloading remote rules for version %s...", self.rules_version + "Matched requested version %s to %s from remote", + self.rules_version, + matched_version, ) - download_url = self.remote_versions[self.rules_version][ + download_url = self.remote_versions[matched_version][ "asset_browser_download_url" ] + filename = self.remote_versions[matched_version]["filename"] self.log.debug("Download URL: %s", download_url) + # Update json_rule_file path to use matched version filename + self.json_rule_file = os.path.join(self.rule_set_path, filename) + if not self.download_remote_version( remote_url=download_url, save_path=self.json_rule_file ): @@ -135,6 +156,18 @@ def __init__( ) else: self.log.info("Remote rules downloaded successfully") + else: + # Using local version + self.log.info( + "Matched requested version %s to %s from local files", + self.rules_version, + matched_version, + ) + # Update json_rule_file path to use matched version + self.json_rule_file = os.path.join( + self.rule_set_path, + f"{self.rules_file_prefix}{matched_version}{self.rules_file_suffix}", + ) self.rules = {} self.column_namespace = column_namespace self.json_rules = {} @@ -143,7 +176,12 @@ def __init__( self.column_types = {} def supported_local_versions(self) -> List[str]: - """Return list of versions from files in rule_set_path.""" + """Return list of highest versions from files in rule_set_path. + + Only returns the highest semantic version for each major.minor prefix. + For example, if both model-1.2.json and model-1.2.0.1.json exist, + only 1.2.0.1 will be returned. + """ versions = [] for filename in os.listdir(self.rule_set_path): if filename.startswith(self.rules_file_prefix) and filename.endswith( @@ -154,7 +192,96 @@ def supported_local_versions(self) -> List[str]: len(self.rules_file_prefix) : -len(self.rules_file_suffix) ] versions.append(version) - return versions + return self._filter_to_highest_versions(versions) + + def _parse_version_from_filename(self, filename: str) -> Optional[str]: + """Extract version from filename like 'model-1.2.0.1.json' -> '1.2.0.1'.""" + if not filename.startswith(self.rules_file_prefix) or not filename.endswith( + self.rules_file_suffix + ): + return None + version = filename[len(self.rules_file_prefix) : -len(self.rules_file_suffix)] + return version if version else None + + def _parse_version_tuple(self, version: str) -> Tuple[int, ...]: + """Convert version string '1.2.0.1' to tuple (1, 2, 0, 1) for comparison.""" + try: + return tuple(int(x) for x in version.split(".")) + except (ValueError, AttributeError): + self.log.warning( + "Malformed version string '%s' - cannot parse as semantic version. " + "Will sort to bottom. Check for corrupted model filenames.", + version, + ) + return (0,) # Fallback for invalid versions + + def _find_best_version_match( + self, requested: str, available: Dict[str, Dict[str, Any]] + ) -> Optional[str]: + """ + Find the best (highest) semantic version matching the requested prefix. + + Args: + requested: Version prefix like '1.2' or '1.3' + available: Dict of available versions with metadata + + Returns: + Best matching version string, or None if no match found + """ + # Filter versions that match the requested prefix + matching = [ + v + for v in available.keys() + if v.startswith(requested + ".") or v == requested + ] + + if not matching: + return None + + # Sort by semantic version (highest first) + matching.sort(key=self._parse_version_tuple, reverse=True) + return matching[0] + + def _filter_to_highest_versions(self, versions: List[str]) -> List[str]: + """ + Filter version list to only include the highest version for each major.minor prefix. + + For example, given ['1.2', '1.2.0', '1.2.0.1', '1.3', '1.3.0.1']: + Returns: ['1.2.0.1', '1.3.0.1'] + + This ensures the supported versions list only shows versions that would + actually be used (since semantic matching always picks the highest). + + Args: + versions: List of version strings + + Returns: + Filtered list with only highest version per major.minor + """ + # Group versions by major.minor prefix + prefix_groups: Dict[str, List[str]] = {} + for v in versions: + # Extract major.minor (first two components) + parts = v.split(".") + if len(parts) >= 2: + prefix = f"{parts[0]}.{parts[1]}" + else: + prefix = v + + if prefix not in prefix_groups: + prefix_groups[prefix] = [] + prefix_groups[prefix].append(v) + + # For each prefix, keep only the highest version + highest_versions = [] + for prefix, group_versions in prefix_groups.items(): + # Sort by semantic version and take the highest + group_versions.sort(key=self._parse_version_tuple, reverse=True) + highest_versions.append(group_versions[0]) + + # Return sorted by semantic version + highest_versions.sort(key=self._parse_version_tuple) + return highest_versions def find_release_assets( self, @@ -164,22 +291,29 @@ def find_release_assets( timeout: float = 15.0, ) -> Dict[str, Dict[str, Any]]: """ - Search GitHub releases for assets whose names start with - self.rules_files_prefix and end with self.rules_files_suffix. + Search GitHub releases for ALL model files across all releases. - Returns a dict of dicts: + Returns a dict keyed by model VERSION (not release tag): { - "version": { - "release_tag": "v1.2", - "asset_browser_download_url": "" + "1.2.0.1": { + "release_tag": "v1.3", + "filename": "model-1.2.0.1.json", + "asset_browser_download_url": "" + }, + "1.3": { + "release_tag": "v1.3", + "filename": "model-1.3.json", + "asset_browser_download_url": "" } } + + When multiple releases contain the same model version, the first (newest) + release wins, since GitHub API returns releases in reverse chronological order. """ session = requests.Session() headers = { "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", - # Optional but helps with routing "User-Agent": "focus-validator/asset-scan", } @@ -194,7 +328,6 @@ def find_release_assets( if resp.status_code == 401: raise PermissionError("Unauthorized (bad or missing token)") if resp.status_code == 403: - # Could be secondary rate limiting or scope issue raise RuntimeError(f"Forbidden / rate limited: {resp.text}") resp.raise_for_status() @@ -209,24 +342,41 @@ def find_release_assets( if not self.allow_prerelease_releases and rel.get("prerelease"): continue + release_tag = rel.get("tag_name", "") assets = rel.get("assets", []) or [] + + # Scan ALL model files in this release for asset in assets: - name = asset.get("name", "") - if name.startswith(self.rules_file_prefix) and name.endswith( - self.rules_file_suffix - ): - results[rel.get("tag_name", "").removeprefix("v")] = { - "release_tag": rel.get("tag_name"), + filename = asset.get("name", "") + model_version = self._parse_version_from_filename(filename) + + if model_version and model_version not in results: + # First match wins = newest release, since GitHub API + # returns releases newest-first + results[model_version] = { + "release_tag": release_tag, + "filename": filename, "asset_browser_download_url": asset.get( "browser_download_url" ), } + page += 1 + self.log.debug( + "Found %d model versions across releases: %s", + len(results), + list(results.keys()), + ) return results def supported_remote_versions(self) -> List[str]: - """Return list of versions from remote source.""" + """Return list of highest versions from remote source. + + Only returns the highest semantic version for each major.minor prefix. + For example, if both 1.2 and 1.2.0.1 are available remotely, + only 1.2.0.1 will be returned since that's what semantic matching would use. + """ # Respect block download setting if self.rules_block_remote_download: self.log.debug( @@ -236,7 +386,8 @@ def supported_remote_versions(self) -> List[str]: # Implement logic to fetch supported remote versions self.remote_versions = self.find_release_assets() - return [v for v in self.remote_versions.keys()] + all_versions = list(self.remote_versions.keys()) + return self._filter_to_highest_versions(all_versions) def download_remote_version(self, remote_url: str, save_path: str) -> bool: """Download the file from remote_url and save it to save_path. @@ -259,21 +410,34 @@ def load(self) -> None: self.load_rules() def load_rules(self) -> ValidationPlan: - val_plan, column_types = JsonLoader.load_json_rules_with_dependencies_and_types( - json_rule_file=self.json_rule_file, - focus_dataset=self.focus_dataset, - filter_rules=self.filter_rules, - applicability_criteria_list=self.applicability_criteria_list, + # Load rules and parse JSON once + val_plan, column_types, model_data = ( + JsonLoader.load_json_rules_with_dependencies_and_types( + json_rule_file=self.json_rule_file, + focus_dataset=self.focus_dataset, + filter_rules=self.filter_rules, + applicability_criteria_list=self.applicability_criteria_list, + ) ) - # Load model version from the JSON file Details section + # Extract FOCUS version and model version from Details (already parsed above) try: - model_data = JsonLoader.load_json_rules(self.json_rule_file) details = model_data.get("Details", {}) + # Override rules_version with FOCUSVersion from model file + focus_version = details.get("FOCUSVersion", None) + if focus_version: + self.rules_version = focus_version + self.log.debug("Loaded FOCUS version: %s", self.rules_version) + else: + self.log.warning( + "FOCUSVersion not found in Details, using requested version: %s", + self.rules_version, + ) + # Load model version self.model_version = details.get("ModelVersion", "Unknown") self.log.debug("Loaded model version: %s", self.model_version) except Exception as e: - self.log.warning("Failed to load model version: %s", e) + self.log.warning("Failed to load FOCUS/model version: %s", e) self.model_version = "Unknown" self.plan = val_plan diff --git a/pyproject.toml b/pyproject.toml index d0eeb4c..1008c62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "focus_validator" -version = "2.0.1" +version = "2.0.2" description = "FOCUS spec validator." authors = [] readme = "README.md" diff --git a/tests/test_spec_rules_unsupported_version.py b/tests/test_spec_rules_unsupported_version.py index be2a8a8..0130391 100644 --- a/tests/test_spec_rules_unsupported_version.py +++ b/tests/test_spec_rules_unsupported_version.py @@ -1,4 +1,5 @@ from unittest import TestCase +from unittest.mock import MagicMock from focus_validator.exceptions import UnsupportedVersion from focus_validator.rules.spec_rules import SpecRules @@ -21,3 +22,206 @@ def test_load_unsupported_version(self): column_namespace=None, ) self.assertIn("FOCUS version 0.1 not supported.", str(cm.exception)) + + +class TestSpecRulesVersionMatching(TestCase): + """Test version-matching helper methods.""" + + def setUp(self): + """Create a SpecRules instance for testing helper methods.""" + self.spec = SpecRules( + rule_set_path="focus_validator/rules", + rules_file_prefix="model-", + rules_version="1.2", # Use 1.2 which will match to available 1.2.0.1 + rules_file_suffix=".json", + focus_dataset="CostAndUsage", + filter_rules=None, + rules_force_remote_download=False, + rules_block_remote_download=True, + allow_draft_releases=False, + allow_prerelease_releases=False, + column_namespace=None, + ) + + def test_parse_version_from_filename_valid(self): + """Test extracting version from valid filenames.""" + test_cases = [ + ("model-1.2.json", "1.2"), + ("model-1.2.0.1.json", "1.2.0.1"), + ("model-2.0.0.json", "2.0.0"), + ] + for filename, expected in test_cases: + with self.subTest(filename=filename): + result = self.spec._parse_version_from_filename(filename) + self.assertEqual(result, expected) + + def test_parse_version_from_filename_invalid(self): + """Test that invalid filenames return None.""" + test_cases = [ + "requirements-1.3.json", # Wrong prefix + "model-1.3.txt", # Wrong suffix + "model-.json", # Empty version + "random.json", # No match + ] + for filename in test_cases: + with self.subTest(filename=filename): + result = self.spec._parse_version_from_filename(filename) + self.assertIsNone(result) + + def test_parse_version_tuple_valid(self): + """Test semantic version tuple parsing for valid versions.""" + test_cases = [ + ("1", (1,)), + ("1.2", (1, 2)), + ("1.2.0", (1, 2, 0)), + ("1.2.0.1", (1, 2, 0, 1)), + ("2.0.0.0", (2, 0, 0, 0)), + ] + for version, expected in test_cases: + with self.subTest(version=version): + result = self.spec._parse_version_tuple(version) + self.assertEqual(result, expected) + + def test_parse_version_tuple_comparison(self): + """Test that version tuples compare correctly.""" + versions = ["1.2", "1.2.0", "1.2.0.1", "1.3", "2.0"] + tuples = [self.spec._parse_version_tuple(v) for v in versions] + + # Verify they're in ascending order + for i in range(len(tuples) - 1): + with self.subTest(comparison=f"{versions[i]} < {versions[i + 1]}"): + self.assertLess(tuples[i], tuples[i + 1]) + + def test_parse_version_tuple_malformed_logs_warning(self): + """Test that malformed versions log warnings and return (0,).""" + # Mock the logger to capture warnings + self.spec.log = MagicMock() + + malformed_versions = [ + "1.2.abc", # Non-numeric component + "1.2.3.x", # Non-numeric component + "invalid", # Completely invalid + ] + + for version in malformed_versions: + with self.subTest(version=version): + result = self.spec._parse_version_tuple(version) + self.assertEqual(result, (0,)) + # Verify warning was logged + self.spec.log.warning.assert_called() + # Check warning message mentions the malformed version + call_args = str(self.spec.log.warning.call_args) + self.assertIn(version, call_args) + + def test_find_best_version_match_exact(self): + """Test finding exact version matches.""" + available = { + "1.2": {}, + "1.3": {}, + "2.0": {}, + } + + test_cases = [ + ("1.2", "1.2"), + ("1.3", "1.3"), + ("2.0", "2.0"), + ] + + for requested, expected in test_cases: + with self.subTest(requested=requested): + result = self.spec._find_best_version_match(requested, available) + self.assertEqual(result, expected) + + def test_find_best_version_match_prefix(self): + """Test finding highest version matching a prefix.""" + available = { + "1.2": {}, + "1.2.0": {}, + "1.2.0.1": {}, + "1.2.0.2": {}, + "1.3": {}, + } + + test_cases = [ + ("1.2", "1.2.0.2", "Should find highest 1.2.x version"), + ("1.2.0", "1.2.0.2", "Should find highest 1.2.0.x version"), + ("1.3", "1.3", "Should find exact match"), + ] + + for requested, expected, description in test_cases: + with self.subTest(description=description): + result = self.spec._find_best_version_match(requested, available) + self.assertEqual(result, expected) + + def test_find_best_version_match_ignores_higher_minor(self): + """Test that 1.2.0 request ignores 1.2.1 and higher.""" + available = { + "1.2.0": {}, + "1.2.0.1": {}, + "1.2.0.2": {}, + "1.2.1": {}, # Should be ignored + "1.2.1.1": {}, # Should be ignored + } + + result = self.spec._find_best_version_match("1.2.0", available) + self.assertEqual(result, "1.2.0.2") + + def test_find_best_version_match_short_prefix(self): + """Test matching with very short prefixes like '1'.""" + available = { + "1.0": {}, + "1.2": {}, + "1.2.0.1": {}, + "1.3": {}, + "2.0": {}, # Should be ignored + } + + result = self.spec._find_best_version_match("1", available) + self.assertEqual(result, "1.3") # Highest 1.x version + + def test_find_best_version_match_no_match(self): + """Test that non-existent versions return None.""" + available = { + "1.2": {}, + "1.3": {}, + } + + test_cases = [ + "2.0", # Major version doesn't exist + "1.4", # Minor version doesn't exist + "1.2.1", # Patch version doesn't exist + ] + + for requested in test_cases: + with self.subTest(requested=requested): + result = self.spec._find_best_version_match(requested, available) + self.assertIsNone(result) + + def test_filter_to_highest_versions_mixed(self): + """Test filtering to highest versions with mixed subversions.""" + versions = ["1.2", "1.2.0", "1.2.0.1", "1.3", "1.3.0.1"] + result = self.spec._filter_to_highest_versions(versions) + self.assertEqual(result, ["1.2.0.1", "1.3.0.1"]) + + def test_filter_to_highest_versions_no_subversions(self): + """Test filtering when no subversions exist.""" + versions = ["1.2", "1.3", "2.0"] + result = self.spec._filter_to_highest_versions(versions) + self.assertEqual(result, ["1.2", "1.3", "2.0"]) + + def test_filter_to_highest_versions_multiple_patches(self): + """Test filtering with multiple patch levels.""" + versions = ["1.2", "1.2.0", "1.2.0.1", "1.2.0.2", "1.2.1", "1.2.1.1"] + result = self.spec._filter_to_highest_versions(versions) + # Should keep highest for 1.2.0.x and highest for 1.2.1.x + # But since we group by major.minor only, should keep overall highest 1.2.x + # which is 1.2.1.1 + self.assertEqual(result, ["1.2.1.1"]) + + def test_filter_to_highest_versions_preserves_order(self): + """Test that filtered versions are sorted semantically.""" + versions = ["2.0", "1.3.0.1", "1.2.0.1", "1.2"] + result = self.spec._filter_to_highest_versions(versions) + # Should return semantically sorted + expected = sorted([v for v in result], key=self.spec._parse_version_tuple) + self.assertEqual(result, expected)