From 6b8399c3059baa656804b6d2299042682d453be1 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Thu, 5 Mar 2026 15:05:02 -0800 Subject: [PATCH 1/3] Expose num_features as a property --- python/nvforest/nvforest/_base.py | 5 +++++ python/nvforest/nvforest/_forest_inference.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/python/nvforest/nvforest/_base.py b/python/nvforest/nvforest/_base.py index ebc363c..2e635d3 100644 --- a/python/nvforest/nvforest/_base.py +++ b/python/nvforest/nvforest/_base.py @@ -90,6 +90,11 @@ def apply( """ pass + @property + @abstractmethod + def num_features(self) -> int: + pass + @property @abstractmethod def num_outputs(self) -> int: diff --git a/python/nvforest/nvforest/_forest_inference.py b/python/nvforest/nvforest/_forest_inference.py index 4637c92..2c3c933 100644 --- a/python/nvforest/nvforest/_forest_inference.py +++ b/python/nvforest/nvforest/_forest_inference.py @@ -387,6 +387,10 @@ def apply( ) -> DataType: return self.forest.apply(X, chunk_size=chunk_size) + @property + def num_features(self) -> int: + return self.forest.num_features + @property def num_outputs(self) -> int: return self.forest.num_outputs @@ -488,6 +492,10 @@ def apply( ) -> DataType: return self.forest.apply(X, chunk_size=chunk_size) + @property + def num_features(self) -> int: + return self.forest.num_features + @property def num_outputs(self) -> int: return self.forest.num_outputs @@ -603,6 +611,10 @@ def apply( ) -> DataType: return self.forest.apply(X, chunk_size=chunk_size) + @property + def num_features(self) -> int: + return self.forest.num_features + @property def num_outputs(self) -> int: return self.forest.num_outputs @@ -706,6 +718,10 @@ def apply( ) -> DataType: return self.forest.apply(X, chunk_size=chunk_size) + @property + def num_features(self) -> int: + return self.forest.num_features + @property def num_outputs(self) -> int: return self.forest.num_outputs From 47025ff2a64e6d5d333818a959b333eb81240047 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Thu, 5 Mar 2026 15:28:04 -0800 Subject: [PATCH 2/3] Check for the size of input --- .../nvforest/nvforest/detail/forest_inference.pyx | 12 ++++++++++++ python/nvforest/tests/test_nvforest.py | 15 +++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/python/nvforest/nvforest/detail/forest_inference.pyx b/python/nvforest/nvforest/detail/forest_inference.pyx index 5496aa3..0340c59 100644 --- a/python/nvforest/nvforest/detail/forest_inference.pyx +++ b/python/nvforest/nvforest/detail/forest_inference.pyx @@ -355,12 +355,22 @@ class ForestInferenceImpl: def elem_postprocessing(self) -> str: return self.impl.elem_postprocessing() + def _validate_input_dims(self, X: DataType) -> None: + if len(X.shape) != 2: + raise ValueError("Expected a 2D array for X") + if X.shape[1] != self.num_features: + raise ValueError( + f"Expected {self.num_features} features in the input " + f"but X has {X.shape[1]} features" + ) + def predict( self, X: DataType, *, chunk_size: Optional[int] = None, ) -> DataType: + self._validate_input_dims(X) # Returns probabilities if the model is a classifier return self.impl.predict( X, chunk_size=(chunk_size or self.default_chunk_size) @@ -372,6 +382,7 @@ class ForestInferenceImpl: *, chunk_size: Optional[int] = None, ) -> DataType: + self._validate_input_dims(X) chunk_size = (chunk_size or self.default_chunk_size) return self.impl.predict( X, predict_type="per_tree", chunk_size=chunk_size @@ -383,6 +394,7 @@ class ForestInferenceImpl: *, chunk_size: Optional[int] = None, ) -> DataType: + self._validate_input_dims(X) chunk_size = (chunk_size or self.default_chunk_size) return self.impl.predict( X, predict_type="leaf_id", chunk_size=chunk_size diff --git a/python/nvforest/tests/test_nvforest.py b/python/nvforest/tests/test_nvforest.py index 9150482..e5042d2 100644 --- a/python/nvforest/tests/test_nvforest.py +++ b/python/nvforest/tests/test_nvforest.py @@ -856,3 +856,18 @@ def test_wide_data(): # Inference should run without crashing fm = nvforest.load_from_sklearn(clf) _ = fm.predict(X) + + +def test_incorrect_data_shape(): + n_rows = 50 + n_features = 5 + X = np.random.normal(size=(n_rows, n_features)).astype(np.float32) + y = np.asarray([0, 1] * (n_rows // 2), dtype=np.int32) + + clf = RandomForestClassifier(max_features="sqrt", n_estimators=10) + clf.fit(X, y) + + fm = nvforest.load_from_sklearn(clf) + assert fm.num_features == n_features + with pytest.raises(ValueError, match=f"Expected {n_features} features"): + fm.predict(np.zeros((1, 4))) From 1c077cf3e8db915450bec57f34196c44021b1579 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Thu, 5 Mar 2026 15:49:07 -0800 Subject: [PATCH 3/3] More test coverage --- python/nvforest/tests/test_nvforest.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/nvforest/tests/test_nvforest.py b/python/nvforest/tests/test_nvforest.py index e5042d2..9aedade 100644 --- a/python/nvforest/tests/test_nvforest.py +++ b/python/nvforest/tests/test_nvforest.py @@ -858,7 +858,17 @@ def test_wide_data(): _ = fm.predict(X) -def test_incorrect_data_shape(): +@pytest.mark.parametrize("input_size", [4, 6], ids=["too_narrow", "too_wide"]) +@pytest.mark.parametrize( + "predict_func", + [ + nvforest.CPUForestInferenceClassifier.predict, + nvforest.CPUForestInferenceClassifier.predict_per_tree, + nvforest.CPUForestInferenceClassifier.apply, + ], + ids=["predict", "predict_per_tree", "apply"], +) +def test_incorrect_data_shape(input_size, predict_func): n_rows = 50 n_features = 5 X = np.random.normal(size=(n_rows, n_features)).astype(np.float32) @@ -867,7 +877,8 @@ def test_incorrect_data_shape(): clf = RandomForestClassifier(max_features="sqrt", n_estimators=10) clf.fit(X, y) - fm = nvforest.load_from_sklearn(clf) + fm = nvforest.load_from_sklearn(clf, device="cpu") assert fm.num_features == n_features with pytest.raises(ValueError, match=f"Expected {n_features} features"): - fm.predict(np.zeros((1, 4))) + X_test = np.zeros((1, input_size)) + _ = predict_func(fm, X_test)