Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions chainladder/development/barnzehn.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def fit(self, X, y=None, sample_weight=None):
self.model_ = DevelopmentML(Pipeline(steps=[
('design_matrix', PatsyFormula(self.formula)),
('model', LinearRegression(fit_intercept=False))]),
y_ml=response, fit_incrementals=False, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(tri)
y_ml=response, fit_incrementals=True, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(X = tri, sample_weight = sample_weight)
resid = tri - self.model_.triangle_ml_[
self.model_.triangle_ml_.valuation <= tri.valuation_date]
self.mse_resid_ = (resid**2).sum(0).sum(1).sum(2).sum() / (
Expand All @@ -100,7 +100,7 @@ def transform(self, X):
X_new : New triangle with transformed attributes.
"""
X_new = X.copy()
X_ml, weight_ml = self.model_._prep_X_ml(X.cum_to_incr().log())
X_ml = self.model_._prep_X_ml(X.cum_to_incr().log())
y_ml = self.model_.estimator_ml.predict(X_ml)
triangle_ml, predicted_data = self.model_._get_triangle_ml(X_ml, y_ml)
backend = "cupy" if X.array_backend == "cupy" else "numpy"
Expand Down
25 changes: 16 additions & 9 deletions chainladder/development/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ class TweedieGLM(DevelopmentBase):

Parameters
----------
drop: tuple or list of tuples
Drops specific origin/development combination(s)
drop_valuation: str or list of str (default = None)
Drops specific valuation periods. str must be date convertible.
design_matrix: formula-like
A patsy formula describing the independent variables, X of the GLM
response: str
Column name for the reponse variable of the GLM. If ommitted, then the
first column of the Triangle will be used.
weight: str
Column name of any weight to use in the GLM. If none specified, then an
unweighted regression will be performed.
power: float, default=0
power: float, default=1
The power determines the underlying target distribution according
to the following table:
+-------+------------------------+
Expand All @@ -52,7 +53,7 @@ class TweedieGLM(DevelopmentBase):
regularization strength. ``alpha = 0`` is equivalent to unpenalized
GLMs. In this case, the design matrix `X` must have full column rank
(no collinearities).
link: {'auto', 'identity', 'log'}, default='auto'
link: {'auto', 'identity', 'log'}, default='log'
The link function of the GLM, i.e. mapping from linear predictor
`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
the link depending on the chosen family as follows:
Expand All @@ -78,10 +79,11 @@ class TweedieGLM(DevelopmentBase):
"""

def __init__(self, design_matrix='C(development) + C(origin)',
response=None, weight=None, power=1.0, alpha=1.0, link='log',
max_iter=100, tol=0.0001, warm_start=False, verbose=0):
response=None, power=1.0, alpha=1.0, link='log',
max_iter=100, tol=0.0001, warm_start=False, verbose=0, drop=None,drop_valuation=None):
self.drop = drop
self.drop_valuation = drop_valuation
self.response=response
self.weight=weight
self.design_matrix = design_matrix
self.power=power
self.alpha=alpha
Expand All @@ -93,13 +95,18 @@ def __init__(self, design_matrix='C(development) + C(origin)',

def fit(self, X, y=None, sample_weight=None):
response = X.columns[0] if not self.response else self.response
if sample_weight is None:
weight = None
else:
weight = 'model'
self.model_ = DevelopmentML(Pipeline(steps=[
('design_matrix', PatsyFormula(self.design_matrix)),
('model', TweedieRegressor(
link=self.link, power=self.power, max_iter=self.max_iter,
tol=self.tol, warm_start=self.warm_start,
verbose=self.verbose, fit_intercept=False))]),
y_ml=response, weight_ml=self.weight).fit(X)
y_ml=response, weighted_step = weight,
drop=self.drop, drop_valuation=self.drop_valuation).fit(X = X, sample_weight = sample_weight)
return self

@property
Expand Down
30 changes: 18 additions & 12 deletions chainladder/development/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class DevelopmentML(DevelopmentBase):
Time Series aspects of the model. Predictions from one development period
get used as featues in the next development period. Lags should be negative
integers.
weight_step: str
Step name within estimator_ml that is weighted
drop: tuple or list of tuples
Drops specific origin/development combination(s)
drop_valuation: str or list of str (default = None)
Expand All @@ -56,8 +58,7 @@ def test_func(df)
return df['origin'] + 1
)
fit_incrementals:
Whether the response variable should be converted to an incremental basis
for fitting.
Whether the response variable should be converted to an incremental basis for fitting.

Attributes
----------
Expand All @@ -70,10 +71,9 @@ def test_func(df)
"""

def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False,
weight_ml=None, weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None):
weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None):
self.estimator_ml=estimator_ml
self.y_ml=y_ml
self.weight_ml = weight_ml
self.weighted_step = weighted_step
self.autoregressive = autoregressive
self.drop = drop
Expand Down Expand Up @@ -168,7 +168,7 @@ def _prep_X_ml(self, X):
df_base = X.incr_to_cum().to_frame(
keepdims=True, implicit_axis=True, origin_as_datetime=True
).reset_index().iloc[:, :-1]
df = df_base.merge(X.cum_to_incr().to_frame(
df = df_base.merge(X_.to_frame(
keepdims=True, implicit_axis=True, origin_as_datetime=True
).reset_index(), how='left',
on=list(df_base.columns)).fillna(0)
Expand All @@ -177,13 +177,18 @@ def _prep_X_ml(self, X):
if self.feat_eng is not None:
for key, item in self.feat_eng.items():
df[key] = item['func'](df=df,**item['kwargs'])
return df

def _prep_w_ml(self,X,sample_weight=None):
weight_base = (~np.isnan(X.values)).astype(float)
weight = weight_base.copy()
weight = weight_base.copy()
if self.drop is not None:
weight = weight * self._drop_func(X)
if self.drop_valuation is not None:
weight = weight * self._drop_valuation_func(X)
return df, weight.flatten()[weight_base.flatten()>0]
weight = weight * self._drop_valuation_func(X)
if sample_weight is not None:
weight = weight * sample_weight.values
return weight.flatten()[weight_base.flatten()>0]

def fit(self, X, y=None, sample_weight=None):
"""Fit the model with X.
Expand All @@ -194,8 +199,8 @@ def fit(self, X, y=None, sample_weight=None):
Set of LDFs to which the estimator will be applied.
y : None
Ignored, use y_ml to set a reponse variable for the ML algorithm
sample_weight : None
Ignored
sample_weight : Triangle-like
Weights to use in the regression

Returns
-------
Expand All @@ -214,8 +219,9 @@ def fit(self, X, y=None, sample_weight=None):
self.valuation_encoder_ = dict(zip(
val,
(pd.Series(val).rank()-1)/{'Y':1, 'S': 2, 'Q':4, 'M': 12}[X.development_grain]))
df, weight = self._prep_X_ml(X)
df = self._prep_X_ml(X)
self.df_ = df
weight = self._prep_w_ml(X,sample_weight)
self.weight_ = weight
if self.weighted_step == None:
sample_weights = {}
Expand Down Expand Up @@ -249,7 +255,7 @@ def transform(self, X):
X_new : New triangle with transformed attributes.
"""
X_new = X.copy()
X_ml, weight_ml = self._prep_X_ml(X)
X_ml = self._prep_X_ml(X)
y_ml=self.estimator_ml.predict(X_ml)
triangle_ml, predicted_data = self._get_triangle_ml(X_ml, y_ml)
backend = "cupy" if X.array_backend == "cupy" else "numpy"
Expand Down
19 changes: 15 additions & 4 deletions chainladder/development/tests/test_barnzehn.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
import chainladder as cl
import pytest
abc = cl.load_sample('abc')

def test_basic_bz():
abc = cl.load_sample('abc')
assert np.all(
np.around(cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(abc).coef_.T.values,3).flatten()
== np.array([11.837,0.179,0.345,0.378,0.405,0.427,0.431,0.66,0.963,1.157,1.278,0.251,-0.056,-0.449,-0.829,-1.169,-1.508,-1.798,-2.023,-2.238,-2.428])
Expand All @@ -21,7 +21,6 @@ def test_feat_eng_1():
def test_func(df):
return df["development"]

abc = cl.load_sample('abc')
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}

assert np.all(
Expand All @@ -38,18 +37,30 @@ def test_feat_eng_2():
def origin_onehot(df,ori):
return [1 if x == ori else 0 for x in df["origin"]]

abc = cl.load_sample('abc')
feat_dict = {f'origin_{x}':{'func':origin_onehot,'kwargs':{'ori':float(x+1)}} for x in range(10)}
assert np.all(
np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3)
== np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3)
)

def test_drops():
'''
this function tests the passing in a basic drop_valuation
'''
def test_func(df):
return df["development"]

test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}

assert np.all(
np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).triangle_ml_.values,3)
== np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).triangle_ml_.values,3)
)

def test_bz_2008():
'''
this function tests the drop parameter by recreating the example in the 2008 BZ paper, section 4.1
'''
abc = cl.load_sample('abc')
exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]])
abc_adj = abc/exposure

Expand Down
6 changes: 6 additions & 0 deletions chainladder/development/tests/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@ def test_basic_odp_cl(genins):
(cl.Chainladder().fit(genins).ultimate_ -
cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) /
genins.latest_diagonal).max()< 1e-2

def test_sample_weight(genins):
assert abs(
(cl.Chainladder().fit(genins).ultimate_ -
cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins,sample_weight=genins/genins)).ultimate_) /
genins.latest_diagonal).max()< 1e-2
19 changes: 19 additions & 0 deletions chainladder/development/tests/test_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import chainladder as cl
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from chainladder.utils.utility_functions import PatsyFormula

def test_incremental(genins):
response = [genins.columns[0]]
model = cl.DevelopmentML(Pipeline(steps=[
('design_matrix', PatsyFormula('C(development)')),
('model', LinearRegression(fit_intercept=False))]),
y_ml=response,fit_incrementals=False).fit(genins)
assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2

def test_misc(genins):
model = cl.DevelopmentML(Pipeline(steps=[
('design_matrix', PatsyFormula('C(development)')),
('model', LinearRegression(fit_intercept=False))]),
weighted_step = ['model'], fit_incrementals=False).fit(genins, sample_weight=genins/genins)
assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2