From 1508d74474e182994d3ab4e752d00f15bf71841f Mon Sep 17 00:00:00 2001 From: "henrydingliu@gmail.com" Date: Wed, 31 Dec 2025 02:32:08 +0000 Subject: [PATCH 1/5] Adding a test for drop_valuation --- chainladder/development/tests/test_barnzehn.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/chainladder/development/tests/test_barnzehn.py b/chainladder/development/tests/test_barnzehn.py index b056bd16..ed4e6e36 100644 --- a/chainladder/development/tests/test_barnzehn.py +++ b/chainladder/development/tests/test_barnzehn.py @@ -45,6 +45,21 @@ def origin_onehot(df,ori): == np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3) ) +def test_drops(): + ''' + this function tests the passing in a basic drop_valuation + ''' + def test_func(df): + return df["development"] + + abc = cl.load_sample('abc') + test_dict = {'testfeat':{'func':test_func,'kwargs':{}}} + + assert np.all( + np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).coef_.T.values,3) + == np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).coef_.T.values,3) + ) + def test_bz_2008(): ''' this function tests the drop parameter by recreating the example in the 2008 BZ paper, section 4.1 From 4f61b996e093b66ccd029a3ecf32c6aac28ac0c6 Mon Sep 17 00:00:00 2001 From: "henrydingliu@gmail.com" Date: Wed, 31 Dec 2025 18:51:33 +0000 Subject: [PATCH 2/5] More fixed for ml methods - separating weight flattening into a separate method from _prep_X_ml - adding sample weight support to glm - cleaning up how weights are handled in each ml method. - various fixes per #533 --- chainladder/development/barnzehn.py | 4 +-- chainladder/development/glm.py | 25 ++++++++++++------- chainladder/development/learning.py | 30 ++++++++++++++--------- chainladder/development/tests/test_glm.py | 2 +- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/chainladder/development/barnzehn.py b/chainladder/development/barnzehn.py index 7a2eb2e8..35d5c0e3 100644 --- a/chainladder/development/barnzehn.py +++ b/chainladder/development/barnzehn.py @@ -75,7 +75,7 @@ def fit(self, X, y=None, sample_weight=None): self.model_ = DevelopmentML(Pipeline(steps=[ ('design_matrix', PatsyFormula(self.formula)), ('model', LinearRegression(fit_intercept=False))]), - y_ml=response, fit_incrementals=False, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(tri) + y_ml=response, fit_incrementals=True, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(X = tri, sample_weight = sample_weight) resid = tri - self.model_.triangle_ml_[ self.model_.triangle_ml_.valuation <= tri.valuation_date] self.mse_resid_ = (resid**2).sum(0).sum(1).sum(2).sum() / ( @@ -100,7 +100,7 @@ def transform(self, X): X_new : New triangle with transformed attributes. """ X_new = X.copy() - X_ml, weight_ml = self.model_._prep_X_ml(X.cum_to_incr().log()) + X_ml = self.model_._prep_X_ml(X.cum_to_incr().log()) y_ml = self.model_.estimator_ml.predict(X_ml) triangle_ml, predicted_data = self.model_._get_triangle_ml(X_ml, y_ml) backend = "cupy" if X.array_backend == "cupy" else "numpy" diff --git a/chainladder/development/glm.py b/chainladder/development/glm.py index 9b121367..c44bb0d5 100644 --- a/chainladder/development/glm.py +++ b/chainladder/development/glm.py @@ -22,15 +22,16 @@ class TweedieGLM(DevelopmentBase): Parameters ---------- + drop: tuple or list of tuples + Drops specific origin/development combination(s) + drop_valuation: str or list of str (default = None) + Drops specific valuation periods. str must be date convertible. design_matrix: formula-like A patsy formula describing the independent variables, X of the GLM response: str Column name for the reponse variable of the GLM. If ommitted, then the first column of the Triangle will be used. - weight: str - Column name of any weight to use in the GLM. If none specified, then an - unweighted regression will be performed. - power: float, default=0 + power: float, default=1 The power determines the underlying target distribution according to the following table: +-------+------------------------+ @@ -52,7 +53,7 @@ class TweedieGLM(DevelopmentBase): regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities). - link: {'auto', 'identity', 'log'}, default='auto' + link: {'auto', 'identity', 'log'}, default='log' The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets the link depending on the chosen family as follows: @@ -78,10 +79,11 @@ class TweedieGLM(DevelopmentBase): """ def __init__(self, design_matrix='C(development) + C(origin)', - response=None, weight=None, power=1.0, alpha=1.0, link='log', - max_iter=100, tol=0.0001, warm_start=False, verbose=0): + response=None, power=1.0, alpha=1.0, link='log', + max_iter=100, tol=0.0001, warm_start=False, verbose=0, drop=None,drop_valuation=None): + self.drop = drop + self.drop_valuation = drop_valuation self.response=response - self.weight=weight self.design_matrix = design_matrix self.power=power self.alpha=alpha @@ -93,13 +95,18 @@ def __init__(self, design_matrix='C(development) + C(origin)', def fit(self, X, y=None, sample_weight=None): response = X.columns[0] if not self.response else self.response + if sample_weight is None: + weight = None + else: + weight = 'model' self.model_ = DevelopmentML(Pipeline(steps=[ ('design_matrix', PatsyFormula(self.design_matrix)), ('model', TweedieRegressor( link=self.link, power=self.power, max_iter=self.max_iter, tol=self.tol, warm_start=self.warm_start, verbose=self.verbose, fit_intercept=False))]), - y_ml=response, weight_ml=self.weight).fit(X) + y_ml=response, weighted_step = weight, + drop=self.drop, drop_valuation=self.drop_valuation).fit(X = X, sample_weight = sample_weight) return self @property diff --git a/chainladder/development/learning.py b/chainladder/development/learning.py index 0df1092b..2a14a640 100644 --- a/chainladder/development/learning.py +++ b/chainladder/development/learning.py @@ -33,6 +33,8 @@ class DevelopmentML(DevelopmentBase): Time Series aspects of the model. Predictions from one development period get used as featues in the next development period. Lags should be negative integers. + weight_step: str + Step name within estimator_ml that is weighted drop: tuple or list of tuples Drops specific origin/development combination(s) drop_valuation: str or list of str (default = None) @@ -56,8 +58,7 @@ def test_func(df) return df['origin'] + 1 ) fit_incrementals: - Whether the response variable should be converted to an incremental basis - for fitting. + Whether the response variable should be converted to an incremental basis for fitting. Attributes ---------- @@ -70,10 +71,9 @@ def test_func(df) """ def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False, - weight_ml=None, weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None): + weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None): self.estimator_ml=estimator_ml self.y_ml=y_ml - self.weight_ml = weight_ml self.weighted_step = weighted_step self.autoregressive = autoregressive self.drop = drop @@ -168,7 +168,7 @@ def _prep_X_ml(self, X): df_base = X.incr_to_cum().to_frame( keepdims=True, implicit_axis=True, origin_as_datetime=True ).reset_index().iloc[:, :-1] - df = df_base.merge(X.cum_to_incr().to_frame( + df = df_base.merge(X_.to_frame( keepdims=True, implicit_axis=True, origin_as_datetime=True ).reset_index(), how='left', on=list(df_base.columns)).fillna(0) @@ -177,13 +177,18 @@ def _prep_X_ml(self, X): if self.feat_eng is not None: for key, item in self.feat_eng.items(): df[key] = item['func'](df=df,**item['kwargs']) + return df + + def _prep_w_ml(self,X,sample_weight=None): weight_base = (~np.isnan(X.values)).astype(float) - weight = weight_base.copy() + weight = weight_base.copy() if self.drop is not None: weight = weight * self._drop_func(X) if self.drop_valuation is not None: - weight = weight * self._drop_valuation_func(X) - return df, weight.flatten()[weight_base.flatten()>0] + weight = weight * self._drop_valuation_func(X) + if sample_weight is not None: + weight = weight * sample_weight.values + return weight.flatten()[weight_base.flatten()>0] def fit(self, X, y=None, sample_weight=None): """Fit the model with X. @@ -194,8 +199,8 @@ def fit(self, X, y=None, sample_weight=None): Set of LDFs to which the estimator will be applied. y : None Ignored, use y_ml to set a reponse variable for the ML algorithm - sample_weight : None - Ignored + sample_weight : Triangle-like + Weights to use in the regression Returns ------- @@ -214,8 +219,9 @@ def fit(self, X, y=None, sample_weight=None): self.valuation_encoder_ = dict(zip( val, (pd.Series(val).rank()-1)/{'Y':1, 'S': 2, 'Q':4, 'M': 12}[X.development_grain])) - df, weight = self._prep_X_ml(X) + df = self._prep_X_ml(X) self.df_ = df + weight = self._prep_w_ml(X,sample_weight) self.weight_ = weight if self.weighted_step == None: sample_weights = {} @@ -249,7 +255,7 @@ def transform(self, X): X_new : New triangle with transformed attributes. """ X_new = X.copy() - X_ml, weight_ml = self._prep_X_ml(X) + X_ml = self._prep_X_ml(X) y_ml=self.estimator_ml.predict(X_ml) triangle_ml, predicted_data = self._get_triangle_ml(X_ml, y_ml) backend = "cupy" if X.array_backend == "cupy" else "numpy" diff --git a/chainladder/development/tests/test_glm.py b/chainladder/development/tests/test_glm.py index 0065d93e..c2c876df 100644 --- a/chainladder/development/tests/test_glm.py +++ b/chainladder/development/tests/test_glm.py @@ -4,4 +4,4 @@ def test_basic_odp_cl(genins): assert abs( (cl.Chainladder().fit(genins).ultimate_ - cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) / - genins.latest_diagonal).max()< 1e-2 + genins.latest_diagonal).max()< 1e-2 \ No newline at end of file From d7e093b318a8342e0599259211a82fde4ceae3b2 Mon Sep 17 00:00:00 2001 From: "henrydingliu@gmail.com" Date: Wed, 31 Dec 2025 19:19:32 +0000 Subject: [PATCH 3/5] adding some tests for ml methods --- chainladder/development/tests/test_glm.py | 6 ++++++ chainladder/development/tests/test_learning.py | 11 +++++++++++ 2 files changed, 17 insertions(+) create mode 100644 chainladder/development/tests/test_learning.py diff --git a/chainladder/development/tests/test_glm.py b/chainladder/development/tests/test_glm.py index c2c876df..ce1a2e82 100644 --- a/chainladder/development/tests/test_glm.py +++ b/chainladder/development/tests/test_glm.py @@ -4,4 +4,10 @@ def test_basic_odp_cl(genins): assert abs( (cl.Chainladder().fit(genins).ultimate_ - cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) / + genins.latest_diagonal).max()< 1e-2 + +def test_sample_weight(genins): + assert abs( + (cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins,sample_weight=genins/genins)).ultimate_ - + cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) / genins.latest_diagonal).max()< 1e-2 \ No newline at end of file diff --git a/chainladder/development/tests/test_learning.py b/chainladder/development/tests/test_learning.py new file mode 100644 index 00000000..a6f02249 --- /dev/null +++ b/chainladder/development/tests/test_learning.py @@ -0,0 +1,11 @@ +import chainladder as cl +from sklearn.linear_model import LinearRegression +from sklearn.pipeline import Pipeline +from chainladder.utils.utility_functions import PatsyFormula + +def test_basic_odp_cl(genins): + model = cl.DevelopmentML(Pipeline(steps=[ + ('design_matrix', PatsyFormula('C(development)')), + ('model', LinearRegression(fit_intercept=False))]), + y_ml=response,fit_incrementals=False).fit(genins) + assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2 \ No newline at end of file From 02a088ff61f003cde785d55448a6654eec2f95f8 Mon Sep 17 00:00:00 2001 From: "henrydingliu@gmail.com" Date: Wed, 31 Dec 2025 19:35:33 +0000 Subject: [PATCH 4/5] fixing tests --- chainladder/development/tests/test_glm.py | 4 ++-- chainladder/development/tests/test_learning.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/chainladder/development/tests/test_glm.py b/chainladder/development/tests/test_glm.py index ce1a2e82..34532871 100644 --- a/chainladder/development/tests/test_glm.py +++ b/chainladder/development/tests/test_glm.py @@ -8,6 +8,6 @@ def test_basic_odp_cl(genins): def test_sample_weight(genins): assert abs( - (cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins,sample_weight=genins/genins)).ultimate_ - - cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) / + (cl.Chainladder().fit(genins).ultimate_ - + cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins,sample_weight=genins/genins)).ultimate_) / genins.latest_diagonal).max()< 1e-2 \ No newline at end of file diff --git a/chainladder/development/tests/test_learning.py b/chainladder/development/tests/test_learning.py index a6f02249..f9d93e9d 100644 --- a/chainladder/development/tests/test_learning.py +++ b/chainladder/development/tests/test_learning.py @@ -3,7 +3,8 @@ from sklearn.pipeline import Pipeline from chainladder.utils.utility_functions import PatsyFormula -def test_basic_odp_cl(genins): +def test_incremental(genins): + response = [genins.columns[0]] model = cl.DevelopmentML(Pipeline(steps=[ ('design_matrix', PatsyFormula('C(development)')), ('model', LinearRegression(fit_intercept=False))]), From 3c185b15ca1e5f6eff55b4ff996d32b1ce5d96dc Mon Sep 17 00:00:00 2001 From: "henrydingliu@gmail.com" Date: Wed, 31 Dec 2025 20:05:57 +0000 Subject: [PATCH 5/5] more ml tests --- chainladder/development/tests/test_barnzehn.py | 10 +++------- chainladder/development/tests/test_learning.py | 7 +++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/chainladder/development/tests/test_barnzehn.py b/chainladder/development/tests/test_barnzehn.py index ed4e6e36..d9841e11 100644 --- a/chainladder/development/tests/test_barnzehn.py +++ b/chainladder/development/tests/test_barnzehn.py @@ -1,9 +1,9 @@ import numpy as np import chainladder as cl import pytest +abc = cl.load_sample('abc') def test_basic_bz(): - abc = cl.load_sample('abc') assert np.all( np.around(cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(abc).coef_.T.values,3).flatten() == np.array([11.837,0.179,0.345,0.378,0.405,0.427,0.431,0.66,0.963,1.157,1.278,0.251,-0.056,-0.449,-0.829,-1.169,-1.508,-1.798,-2.023,-2.238,-2.428]) @@ -21,7 +21,6 @@ def test_feat_eng_1(): def test_func(df): return df["development"] - abc = cl.load_sample('abc') test_dict = {'testfeat':{'func':test_func,'kwargs':{}}} assert np.all( @@ -38,7 +37,6 @@ def test_feat_eng_2(): def origin_onehot(df,ori): return [1 if x == ori else 0 for x in df["origin"]] - abc = cl.load_sample('abc') feat_dict = {f'origin_{x}':{'func':origin_onehot,'kwargs':{'ori':float(x+1)}} for x in range(10)} assert np.all( np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3) @@ -52,19 +50,17 @@ def test_drops(): def test_func(df): return df["development"] - abc = cl.load_sample('abc') test_dict = {'testfeat':{'func':test_func,'kwargs':{}}} assert np.all( - np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).coef_.T.values,3) - == np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).coef_.T.values,3) + np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).triangle_ml_.values,3) + == np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).triangle_ml_.values,3) ) def test_bz_2008(): ''' this function tests the drop parameter by recreating the example in the 2008 BZ paper, section 4.1 ''' - abc = cl.load_sample('abc') exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]]) abc_adj = abc/exposure diff --git a/chainladder/development/tests/test_learning.py b/chainladder/development/tests/test_learning.py index f9d93e9d..0b411aca 100644 --- a/chainladder/development/tests/test_learning.py +++ b/chainladder/development/tests/test_learning.py @@ -9,4 +9,11 @@ def test_incremental(genins): ('design_matrix', PatsyFormula('C(development)')), ('model', LinearRegression(fit_intercept=False))]), y_ml=response,fit_incrementals=False).fit(genins) + assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2 + +def test_misc(genins): + model = cl.DevelopmentML(Pipeline(steps=[ + ('design_matrix', PatsyFormula('C(development)')), + ('model', LinearRegression(fit_intercept=False))]), + weighted_step = ['model'], fit_incrementals=False).fit(genins, sample_weight=genins/genins) assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2 \ No newline at end of file