From 6b8399c3059baa656804b6d2299042682d453be1 Mon Sep 17 00:00:00 2001
From: Hyunsu Cho <phcho@nvidia.com>
Date: Thu, 5 Mar 2026 15:05:02 -0800
Subject: [PATCH 1/3] Expose num_features as a property

---
 python/nvforest/nvforest/_base.py             |  5 +++++
 python/nvforest/nvforest/_forest_inference.py | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/python/nvforest/nvforest/_base.py b/python/nvforest/nvforest/_base.py
index ebc363c..2e635d3 100644
--- a/python/nvforest/nvforest/_base.py
+++ b/python/nvforest/nvforest/_base.py
@@ -90,6 +90,11 @@ def apply(
         """
         pass
 
+    @property
+    @abstractmethod
+    def num_features(self) -> int:
+        pass
+
     @property
     @abstractmethod
     def num_outputs(self) -> int:
diff --git a/python/nvforest/nvforest/_forest_inference.py b/python/nvforest/nvforest/_forest_inference.py
index 4637c92..2c3c933 100644
--- a/python/nvforest/nvforest/_forest_inference.py
+++ b/python/nvforest/nvforest/_forest_inference.py
@@ -387,6 +387,10 @@ def apply(
     ) -> DataType:
         return self.forest.apply(X, chunk_size=chunk_size)
 
+    @property
+    def num_features(self) -> int:
+        return self.forest.num_features
+
     @property
     def num_outputs(self) -> int:
         return self.forest.num_outputs
@@ -488,6 +492,10 @@ def apply(
     ) -> DataType:
         return self.forest.apply(X, chunk_size=chunk_size)
 
+    @property
+    def num_features(self) -> int:
+        return self.forest.num_features
+
     @property
     def num_outputs(self) -> int:
         return self.forest.num_outputs
@@ -603,6 +611,10 @@ def apply(
     ) -> DataType:
         return self.forest.apply(X, chunk_size=chunk_size)
 
+    @property
+    def num_features(self) -> int:
+        return self.forest.num_features
+
     @property
     def num_outputs(self) -> int:
         return self.forest.num_outputs
@@ -706,6 +718,10 @@ def apply(
     ) -> DataType:
         return self.forest.apply(X, chunk_size=chunk_size)
 
+    @property
+    def num_features(self) -> int:
+        return self.forest.num_features
+
     @property
     def num_outputs(self) -> int:
         return self.forest.num_outputs

From 47025ff2a64e6d5d333818a959b333eb81240047 Mon Sep 17 00:00:00 2001
From: Hyunsu Cho <phcho@nvidia.com>
Date: Thu, 5 Mar 2026 15:28:04 -0800
Subject: [PATCH 2/3] Check for the size of input

---
 .../nvforest/nvforest/detail/forest_inference.pyx | 12 ++++++++++++
 python/nvforest/tests/test_nvforest.py            | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/python/nvforest/nvforest/detail/forest_inference.pyx b/python/nvforest/nvforest/detail/forest_inference.pyx
index 5496aa3..0340c59 100644
--- a/python/nvforest/nvforest/detail/forest_inference.pyx
+++ b/python/nvforest/nvforest/detail/forest_inference.pyx
@@ -355,12 +355,22 @@ class ForestInferenceImpl:
     def elem_postprocessing(self) -> str:
         return self.impl.elem_postprocessing()
 
+    def _validate_input_dims(self, X: DataType) -> None:
+        if len(X.shape) != 2:
+            raise ValueError("Expected a 2D array for X")
+        if X.shape[1] != self.num_features:
+            raise ValueError(
+                f"Expected {self.num_features} features in the input "
+                f"but X has {X.shape[1]} features"
+            )
+
     def predict(
         self,
         X: DataType,
         *,
         chunk_size: Optional[int] = None,
     ) -> DataType:
+        self._validate_input_dims(X)
         # Returns probabilities if the model is a classifier
         return self.impl.predict(
             X, chunk_size=(chunk_size or self.default_chunk_size)
@@ -372,6 +382,7 @@ class ForestInferenceImpl:
         *,
         chunk_size: Optional[int] = None,
     ) -> DataType:
+        self._validate_input_dims(X)
         chunk_size = (chunk_size or self.default_chunk_size)
         return self.impl.predict(
             X, predict_type="per_tree", chunk_size=chunk_size
@@ -383,6 +394,7 @@ class ForestInferenceImpl:
         *,
         chunk_size: Optional[int] = None,
     ) -> DataType:
+        self._validate_input_dims(X)
         chunk_size = (chunk_size or self.default_chunk_size)
         return self.impl.predict(
             X, predict_type="leaf_id", chunk_size=chunk_size
diff --git a/python/nvforest/tests/test_nvforest.py b/python/nvforest/tests/test_nvforest.py
index 9150482..e5042d2 100644
--- a/python/nvforest/tests/test_nvforest.py
+++ b/python/nvforest/tests/test_nvforest.py
@@ -856,3 +856,18 @@ def test_wide_data():
     # Inference should run without crashing
     fm = nvforest.load_from_sklearn(clf)
     _ = fm.predict(X)
+
+
+def test_incorrect_data_shape():
+    n_rows = 50
+    n_features = 5
+    X = np.random.normal(size=(n_rows, n_features)).astype(np.float32)
+    y = np.asarray([0, 1] * (n_rows // 2), dtype=np.int32)
+
+    clf = RandomForestClassifier(max_features="sqrt", n_estimators=10)
+    clf.fit(X, y)
+
+    fm = nvforest.load_from_sklearn(clf)
+    assert fm.num_features == n_features
+    with pytest.raises(ValueError, match=f"Expected {n_features} features"):
+        fm.predict(np.zeros((1, 4)))

From 1c077cf3e8db915450bec57f34196c44021b1579 Mon Sep 17 00:00:00 2001
From: Hyunsu Cho <phcho@nvidia.com>
Date: Thu, 5 Mar 2026 15:49:07 -0800
Subject: [PATCH 3/3] More test coverage

---
 python/nvforest/tests/test_nvforest.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/python/nvforest/tests/test_nvforest.py b/python/nvforest/tests/test_nvforest.py
index e5042d2..9aedade 100644
--- a/python/nvforest/tests/test_nvforest.py
+++ b/python/nvforest/tests/test_nvforest.py
@@ -858,7 +858,17 @@ def test_wide_data():
     _ = fm.predict(X)
 
 
-def test_incorrect_data_shape():
+@pytest.mark.parametrize("input_size", [4, 6], ids=["too_narrow", "too_wide"])
+@pytest.mark.parametrize(
+    "predict_func",
+    [
+        nvforest.CPUForestInferenceClassifier.predict,
+        nvforest.CPUForestInferenceClassifier.predict_per_tree,
+        nvforest.CPUForestInferenceClassifier.apply,
+    ],
+    ids=["predict", "predict_per_tree", "apply"],
+)
+def test_incorrect_data_shape(input_size, predict_func):
     n_rows = 50
     n_features = 5
     X = np.random.normal(size=(n_rows, n_features)).astype(np.float32)
@@ -867,7 +877,8 @@ def test_incorrect_data_shape():
     clf = RandomForestClassifier(max_features="sqrt", n_estimators=10)
     clf.fit(X, y)
 
-    fm = nvforest.load_from_sklearn(clf)
+    fm = nvforest.load_from_sklearn(clf, device="cpu")
     assert fm.num_features == n_features
     with pytest.raises(ValueError, match=f"Expected {n_features} features"):
-        fm.predict(np.zeros((1, 4)))
+        X_test = np.zeros((1, input_size))
+        _ = predict_func(fm, X_test)