-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimize_lambda.py
More file actions
83 lines (69 loc) · 2.93 KB
/
optimize_lambda.py
File metadata and controls
83 lines (69 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
import lr
import config
class CustomCrossValidator:
"""
Based on https://alex.miller.im/posts/linear-model-custom-loss-function-regularization-python/.
Cross validates arbitrary model using MAPE criterion on
list of lambdas.
"""
def __init__(self, X, Y):
self.X = X
self.Y = Y
def cross_validate(self, lambdas, num_folds=config.number_of_lambda_folds):
"""
lambdas: set of regularization parameters to try
num_folds: number of folds to cross-validate against
"""
self.lambdas = lambdas
self.cv_scores = []
X = self.X.to_numpy()
Y = self.Y.to_numpy()
for lam in self.lambdas:
print("Lambda: {}".format(lam))
weights_init = None
# Split data into training/holdout sets
kf = KFold(n_splits=num_folds, shuffle=True, random_state=0)
kf.get_n_splits(X)
# Keep track of the error for each holdout fold
k_fold_scores = []
# Iterate over folds, using k-1 folds for training
# and the k-th fold for validation
f = 1
for train_index, test_index in kf.split(X):
# Training data
CV_X = X[train_index,:]
CV_Y = Y[train_index]
# Holdout data
holdout_X = X[test_index,:]
holdout_Y = Y[test_index]
# Fit model to training sample
lambda_fold_model = lr.CustomLogisticRegression(
X=CV_X,
Y=CV_Y,
A=None,
weights_init=weights_init,
alpha=0,
beta=0,
gamma=0,
_lambda=lam
)
lambda_fold_model.fit()
# Calculate holdout error
fold_probs = lambda_fold_model.predict_prob(holdout_X)
fold_cross_entropy = log_loss(holdout_Y, fold_probs)
k_fold_scores.append(fold_cross_entropy)
print("Fold: {}. Error: {}".format(f, fold_cross_entropy))
f += 1
# Error associated with each lambda is the average
# of the errors across the k folds
lambda_scores = np.mean(k_fold_scores)
print("LAMBDA AVERAGE: {}".format(lambda_scores))
self.cv_scores.append(lambda_scores)
# Optimal lambda is that which minimizes the cross-validation error
self.lambda_star_index = np.argmin(self.cv_scores)
self.lambda_star = self.lambdas[self.lambda_star_index]
print("\n\n**OPTIMAL LAMBDA: {}**".format(self.lambda_star))
return self.lambda_star