⭐ 1. Introduction & Overview¶

Your Goal: Your goal is to predict whether a client will subscribe to a bank term deposit.

🔹 2. Import Libraries & Set Up¶

In [32]:
# =============================================================================    
# MACHINE LEARNING LIBRARIES - SIMPLE IMPORTS
# =============================================================================    

# Set environment variable for scipy array API support
import os
os.environ['SCIPY_ARRAY_API'] = '1'

# Core Data Science Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Gradient Boosting Libraries
import xgboost as xgb
import lightgbm as lgb

# Deep Learning
#import torch
#import torch.nn as nn
#import torch.optim as optim
#import torchvision.transforms as transforms

import tensorflow as tf
from tensorflow import keras

# Visualization
import plotly.express as px
import plotly.graph_objects as go
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool

# Computer Vision
import cv2

# Scientific Computing & Statistics
import scipy.stats as stats
from scipy import optimize
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose

# Image Processing
from PIL import Image, ImageDraw, ImageFont

# Sampling and Resampling - Try import, use alternatives if failed
try:
    from imblearn.over_sampling import SMOTE
    from imblearn.under_sampling import RandomUnderSampler
    IMBLEARN_AVAILABLE = True
except ImportError:
    print("imblearn not available, using sklearn class_weight='balanced' instead")
    IMBLEARN_AVAILABLE = False

# Utilities
import sys
import warnings
import datetime
from pathlib import Path
import pickle
import json

# Configuration
plt.rcParams['figure.figsize'] = (10, 6)
sns.set_palette("husl")
warnings.filterwarnings('ignore')
SEED = 42
In [33]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
bank = pd.read_csv('bank-full.csv', delimiter=';')
In [34]:
train.head()
Out[34]:
id age job marital education default balance housing loan contact day month duration campaign pdays previous poutcome y
0 0 42 technician married secondary no 7 no no cellular 25 aug 117 3 -1 0 unknown 0
1 1 38 blue-collar married secondary no 514 no no unknown 18 jun 185 1 -1 0 unknown 0
2 2 36 blue-collar married secondary no 602 yes no unknown 14 may 111 2 -1 0 unknown 0
3 3 27 student single secondary no 34 yes no unknown 28 may 10 2 -1 0 unknown 0
4 4 26 technician married secondary no 889 yes no cellular 3 feb 902 1 -1 0 unknown 1
In [35]:
# Enhanced Feature Engineering for Bank Marketing (Non-redundant)
import numpy as np
import pandas as pd

def create_features(df):
    df = df.copy()

    # Original many_no feature (captures risk aversion pattern)
    def many_no(x):
        if x['default']=='no' and x['housing']=='no' and x['loan']=='no':
            return 21
        if x['default']=='no' and x['housing']=='no'\
        or x['default']=='no' and x['loan']=='no'\
        or x['housing']=='no' and x['loan']=='no':
            return 7
        if x['default']=='no' or x['housing']=='no' or x['loan']=='no':
            return 3
        return 0

    df['many_no'] = df.apply(lambda x: many_no(x), axis=1)

    df['balance_duration'] = df['balance'] * df['duration']  # Financial capacity × engagement
    df['campaign_previous'] = df['campaign'] * df['previous']  # Current effort × past history
    df['age_balance'] = df['age'] * df['balance']  # Life stage × wealth

    df['contact_success_ratio'] = df['previous'] / (df['campaign'] + 1)
    df['days_since_contact'] = np.where(df['pdays'] == -1, 999, df['pdays'])

    df['age_group'] = pd.cut(df['age'], bins=[0, 25, 35, 50, 65, 100],
                            labels=[0, 1, 2, 3, 4]).astype(int)
    df['balance_category'] = pd.cut(df['balance'], bins=[-np.inf, 0, 1000, 5000, np.inf],
                                    labels=[0, 1, 2, 3]).astype(int)

    return df

bank = create_features(bank)
train = create_features(train)
test = create_features(test)
In [36]:
# Prepare feature matrix (X) and target vector (y) for training
X = train.drop(["y", "id"], axis=1)
y = train["y"]

# Prepare feature matrix (X_bank) and target vector (y_bank) for training
X_bank = bank.drop(["y"], axis=1)
y_bank = bank["y"].map({'yes': 1, 'no': 0})

# Prepare feature matrix (X_test) for testing
X_test = test.drop(["id"], axis=1)
In [37]:
object_cols = train.select_dtypes(include="object").columns

from sklearn.preprocessing import LabelEncoder

for col_name in object_cols:
    le = LabelEncoder()
    X[col_name] = le.fit_transform(X[col_name])
    X_test[col_name] = le.transform(X_test[col_name])
    X_bank[col_name] = le.transform(X_bank[col_name])
In [38]:
train.head(3)
Out[38]:
id age job marital education default balance housing loan contact ... poutcome y many_no balance_duration campaign_previous age_balance contact_success_ratio days_since_contact age_group balance_category
0 0 42 technician married secondary no 7 no no cellular ... unknown 0 21 819 0 294 0.0 999 2 1
1 1 38 blue-collar married secondary no 514 no no unknown ... unknown 0 21 95090 0 19532 0.0 999 2 1
2 2 36 blue-collar married secondary no 602 yes no unknown ... unknown 0 7 66822 0 21672 0.0 999 2 1

3 rows × 26 columns

In [39]:
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold

# Use 10-fold stratified cross-validation
n_splits = 10
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
y_probs = np.zeros(len(X_test))
models = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f"Training fold {fold + 1}/{n_splits} >>>")
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

    X_train = pd.concat([X_train, X_bank])
    y_train = pd.concat([y_train, y_bank])
    
    model = lgb.LGBMClassifier(
        n_estimators=20000,
        learning_rate=0.06,
        num_leaves=100,
        max_depth=10,
        min_child_samples=9,
        subsample=0.8,
        colsample_bytree=0.5,
        reg_alpha=0.78,
        reg_lambda=3.0,
        max_bin=4523,
        random_state=42,
        verbosity=-1
    )
    
    model.fit(
        X_train, 
        y_train, 
        eval_set=[(X_val, y_val)], 
        callbacks=[
            lgb.early_stopping(100),
            lgb.log_evaluation(period=500)
        ]
    )

    models.append(model)
    
    # Average predictions across all folds
    y_probs += model.predict_proba(X_test)[:, 1] / n_splits
Training fold 1/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.134942
[1000]	valid_0's binary_logloss: 0.131812
[1500]	valid_0's binary_logloss: 0.130558
[2000]	valid_0's binary_logloss: 0.12992
[2500]	valid_0's binary_logloss: 0.129507
Early stopping, best iteration is:
[2605]	valid_0's binary_logloss: 0.129407
Training fold 2/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.137124
[1000]	valid_0's binary_logloss: 0.133799
[1500]	valid_0's binary_logloss: 0.132358
[2000]	valid_0's binary_logloss: 0.131729
[2500]	valid_0's binary_logloss: 0.13127
Early stopping, best iteration is:
[2558]	valid_0's binary_logloss: 0.131218
Training fold 3/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.138993
[1000]	valid_0's binary_logloss: 0.135548
[1500]	valid_0's binary_logloss: 0.134042
[2000]	valid_0's binary_logloss: 0.133397
Early stopping, best iteration is:
[2179]	valid_0's binary_logloss: 0.133221
Training fold 4/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.13742
[1000]	valid_0's binary_logloss: 0.133993
[1500]	valid_0's binary_logloss: 0.132662
[2000]	valid_0's binary_logloss: 0.132007
[2500]	valid_0's binary_logloss: 0.131704
Early stopping, best iteration is:
[2619]	valid_0's binary_logloss: 0.131637
Training fold 5/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.139034
[1000]	valid_0's binary_logloss: 0.135711
[1500]	valid_0's binary_logloss: 0.134509
[2000]	valid_0's binary_logloss: 0.134
Early stopping, best iteration is:
[2334]	valid_0's binary_logloss: 0.133784
Training fold 6/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.138064
[1000]	valid_0's binary_logloss: 0.134963
[1500]	valid_0's binary_logloss: 0.133569
Early stopping, best iteration is:
[1754]	valid_0's binary_logloss: 0.133191
Training fold 7/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.135646
[1000]	valid_0's binary_logloss: 0.132563
[1500]	valid_0's binary_logloss: 0.130898
[2000]	valid_0's binary_logloss: 0.130101
Early stopping, best iteration is:
[2313]	valid_0's binary_logloss: 0.129904
Training fold 8/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.137441
[1000]	valid_0's binary_logloss: 0.134486
[1500]	valid_0's binary_logloss: 0.133291
[2000]	valid_0's binary_logloss: 0.132774
[2500]	valid_0's binary_logloss: 0.132539
Early stopping, best iteration is:
[2412]	valid_0's binary_logloss: 0.13253
Training fold 9/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.137212
[1000]	valid_0's binary_logloss: 0.1339
[1500]	valid_0's binary_logloss: 0.132606
[2000]	valid_0's binary_logloss: 0.132026
Early stopping, best iteration is:
[2337]	valid_0's binary_logloss: 0.131833
Training fold 10/10 >>>
Training until validation scores don't improve for 100 rounds
[500]	valid_0's binary_logloss: 0.13835
[1000]	valid_0's binary_logloss: 0.134681
[1500]	valid_0's binary_logloss: 0.133202
[2000]	valid_0's binary_logloss: 0.132491
[2500]	valid_0's binary_logloss: 0.132217
Early stopping, best iteration is:
[2804]	valid_0's binary_logloss: 0.132031
  1. [2876] valid_0's binary_logloss: 0.131302
  2. [2804] valid_0's binary_logloss: 0.132031
In [40]:
from sklearn.metrics import roc_auc_score

best_auc = roc_auc_score(y, model.predict_proba(X)[:, 1])
print(f"Best AUC: {best_auc:.4f}")
Best AUC: 0.9892
  1. pre-set features, best AUC: 0.9879
  2. new features, best AUC: 0.9892
In [41]:
output = pd.DataFrame({
    'id': test.id,
    'y': y_probs
})

output.to_csv('attempt-lightgbm7.csv', index=False)
print("Your submission was successfully saved!")
Your submission was successfully saved!

Ensemble of LightGBM, XGBoost, CATboost

In [42]:
import optuna, catboost as cb
In [43]:
# XGBoost Hyperparameter Tuning - Complementary to LightGBM/CatBoost
def objective_xgboost(trial):
    # Parameter space for robust regularization (different approach than LightGBM)
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 2000, 5000),
        'learning_rate': trial.suggest_float('learning_rate', 0.03, 0.08),
        'max_depth': trial.suggest_int('max_depth', 6, 10),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.9),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 2.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1.0, 5.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'random_state': 42,
        'eval_metric': 'logloss',
        'early_stopping_rounds': 100,
        'verbosity': 0
    }

    # 5-fold CV for faster tuning
    cv_scores = []
    kf_tune = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in kf_tune.split(X, y):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

        # Add bank data
        X_train_fold = pd.concat([X_train_fold, X_bank])
        y_train_fold = pd.concat([y_train_fold, y_bank])

        model = xgb.XGBClassifier(**params)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)], verbose=False)

        pred = model.predict_proba(X_val_fold)[:, 1]
        score = roc_auc_score(y_val_fold, pred)
        cv_scores.append(score)

    return np.mean(cv_scores)

print("Tuning XGBoost parameters...")
study_xgboost = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))       
study_xgboost.optimize(objective_xgboost, n_trials=50)

best_xgboost_params = study_xgboost.best_params
print(f"Best XGBoost CV AUC: {study_xgboost.best_value:.4f}")
print(f"Best XGBoost params: {best_xgboost_params}")
[I 2025-08-07 13:01:26,444] A new study created in memory with name: no-name-d418a1b1-a8db-4ae7-9f4d-ecdf4f262018
Tuning XGBoost parameters...
[I 2025-08-07 13:02:52,437] Trial 0 finished with value: 0.9681654297437244 and parameters: {'n_estimators': 3123, 'learning_rate': 0.0775357153204958, 'max_depth': 9, 'min_child_weight': 5, 'subsample': 0.7312037280884873, 'colsample_bytree': 0.7311989040672405, 'reg_alpha': 0.21035886311957896, 'reg_lambda': 4.46470458309974, 'gamma': 0.6011150117432088}. Best is trial 0 with value: 0.9681654297437244.
[I 2025-08-07 13:05:57,541] Trial 1 finished with value: 0.9685067764131802 and parameters: {'n_estimators': 4124, 'learning_rate': 0.03102922471479012, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.7424678221356552, 'colsample_bytree': 0.73636499344142, 'reg_alpha': 0.4484685687215243, 'reg_lambda': 2.216968971838151, 'gamma': 0.5247564316322378}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:08:19,918] Trial 2 finished with value: 0.9684309696188743 and parameters: {'n_estimators': 3296, 'learning_rate': 0.04456145700990209, 'max_depth': 9, 'min_child_weight': 1, 'subsample': 0.7584289297070436, 'colsample_bytree': 0.7732723686587383, 'reg_alpha': 0.9665329700123682, 'reg_lambda': 4.140703845572054, 'gamma': 0.19967378215835974}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:12:01,230] Trial 3 finished with value: 0.968241902166777 and parameters: {'n_estimators': 3543, 'learning_rate': 0.05962072844310212, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.7341048247374583, 'colsample_bytree': 0.7130103185970559, 'reg_alpha': 1.9028825207813331, 'reg_lambda': 4.862528132298237, 'gamma': 0.8083973481164611}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:15:14,937] Trial 4 finished with value: 0.968422817858561 and parameters: {'n_estimators': 2914, 'learning_rate': 0.03488360570031919, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.7244076469689558, 'colsample_bytree': 0.799035382022254, 'reg_alpha': 0.16533819011891496, 'reg_lambda': 4.637281608315128, 'gamma': 0.2587799816000169}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:17:59,319] Trial 5 finished with value: 0.9683243602374161 and parameters: {'n_estimators': 3988, 'learning_rate': 0.04558555380447055, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.7369708911051054, 'colsample_bytree': 0.8939169255529118, 'reg_alpha': 1.5727523643861177, 'reg_lambda': 4.757995766256757, 'gamma': 0.8948273504276488}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:20:30,376] Trial 6 finished with value: 0.9680618053962011 and parameters: {'n_estimators': 3794, 'learning_rate': 0.07609371175115584, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.7090454577821076, 'colsample_bytree': 0.7650660661526528, 'reg_alpha': 0.8384868504100158, 'reg_lambda': 2.0853961270955836, 'gamma': 0.8287375091519293}. Best is trial 1 with value: 0.9685067764131802.
[I 2025-08-07 13:23:34,919] Trial 7 finished with value: 0.9685565580431874 and parameters: {'n_estimators': 3070, 'learning_rate': 0.04404672548436904, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.8604393961508079, 'colsample_bytree': 0.7149101287359542, 'reg_alpha': 1.975085179540983, 'reg_lambda': 4.08897907718663, 'gamma': 0.1987156815341724}. Best is trial 7 with value: 0.9685565580431874.
[I 2025-08-07 13:25:08,307] Trial 8 finished with value: 0.9683656780752485 and parameters: {'n_estimators': 2016, 'learning_rate': 0.07077307142274171, 'max_depth': 9, 'min_child_weight': 6, 'subsample': 0.8542540693371892, 'colsample_bytree': 0.714808930346818, 'reg_alpha': 0.7810848842341179, 'reg_lambda': 1.4634762381005189, 'gamma': 0.8631034258755935}. Best is trial 7 with value: 0.9685565580431874.
[I 2025-08-07 13:29:24,095] Trial 9 finished with value: 0.9682804122687492 and parameters: {'n_estimators': 3870, 'learning_rate': 0.046544901242632455, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.7650366644053493, 'colsample_bytree': 0.8459212356676128, 'reg_alpha': 1.3113591955749049, 'reg_lambda': 4.548850970305306, 'gamma': 0.4722149251619493}. Best is trial 7 with value: 0.9685565580431874.
[I 2025-08-07 13:32:04,390] Trial 10 finished with value: 0.9683976536882083 and parameters: {'n_estimators': 4836, 'learning_rate': 0.060598596139102734, 'max_depth': 7, 'min_child_weight': 1, 'subsample': 0.8962141652178608, 'colsample_bytree': 0.8380494147138291, 'reg_alpha': 1.948771745462287, 'reg_lambda': 3.496334108689462, 'gamma': 0.0397996060770148}. Best is trial 7 with value: 0.9685565580431874.
[I 2025-08-07 13:35:16,822] Trial 11 finished with value: 0.9685690567785026 and parameters: {'n_estimators': 4712, 'learning_rate': 0.03125412937116301, 'max_depth': 10, 'min_child_weight': 7, 'subsample': 0.8125061405332253, 'colsample_bytree': 0.7486758045268418, 'reg_alpha': 0.5320224533652154, 'reg_lambda': 2.702958058813853, 'gamma': 0.46994321499695574}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:38:47,886] Trial 12 finished with value: 0.9685023742854725 and parameters: {'n_estimators': 4815, 'learning_rate': 0.03720456338607968, 'max_depth': 8, 'min_child_weight': 7, 'subsample': 0.821814772686749, 'colsample_bytree': 0.7572042288907423, 'reg_alpha': 1.2925215959863048, 'reg_lambda': 3.250426043779354, 'gamma': 0.345437871794682}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:41:24,331] Trial 13 finished with value: 0.9684950316469078 and parameters: {'n_estimators': 2550, 'learning_rate': 0.03937663157166836, 'max_depth': 10, 'min_child_weight': 7, 'subsample': 0.8154140805769239, 'colsample_bytree': 0.7051098989621086, 'reg_alpha': 0.5589543726626454, 'reg_lambda': 2.6614202484762868, 'gamma': 0.03778031229306575}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:44:31,507] Trial 14 finished with value: 0.9684448419012266 and parameters: {'n_estimators': 4334, 'learning_rate': 0.04982309189699997, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.8561338035887187, 'colsample_bytree': 0.7944478816823999, 'reg_alpha': 1.6625870988287481, 'reg_lambda': 3.7577508159850352, 'gamma': 0.38383114510937844}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:48:57,692] Trial 15 finished with value: 0.9684619660268263 and parameters: {'n_estimators': 2748, 'learning_rate': 0.03163468929187697, 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.7897096113780152, 'colsample_bytree': 0.7436181591751374, 'reg_alpha': 1.212992579215986, 'reg_lambda': 1.062041527976909, 'gamma': 0.6423620561394954}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:51:30,045] Trial 16 finished with value: 0.9684402711488886 and parameters: {'n_estimators': 4418, 'learning_rate': 0.053597616885897405, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.8527494789829706, 'colsample_bytree': 0.7031207411014159, 'reg_alpha': 0.4812031244022279, 'reg_lambda': 2.6542232604747626, 'gamma': 0.16527177610975427}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:53:53,403] Trial 17 finished with value: 0.968561409664099 and parameters: {'n_estimators': 2293, 'learning_rate': 0.041117317845056386, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.8871933728473397, 'colsample_bytree': 0.8285694861181111, 'reg_alpha': 1.525765108083813, 'reg_lambda': 3.941375147628981, 'gamma': 0.6712369673505519}. Best is trial 11 with value: 0.9685690567785026.
[I 2025-08-07 13:56:19,464] Trial 18 finished with value: 0.9685970618366178 and parameters: {'n_estimators': 2054, 'learning_rate': 0.04025387737768451, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.8813483474949517, 'colsample_bytree': 0.8312087668843073, 'reg_alpha': 1.5084255565406828, 'reg_lambda': 2.9313945858455686, 'gamma': 0.6972987930890137}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 13:59:33,184] Trial 19 finished with value: 0.9685373914693688 and parameters: {'n_estimators': 2011, 'learning_rate': 0.03048632991768497, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.825752469459229, 'colsample_bytree': 0.8698062277490901, 'reg_alpha': 1.1308969090374692, 'reg_lambda': 2.8146421430011497, 'gamma': 0.7157675415766318}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:02:03,942] Trial 20 finished with value: 0.968445555276028 and parameters: {'n_estimators': 4617, 'learning_rate': 0.035413075471851865, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.7918855630714174, 'colsample_bytree': 0.817744961921466, 'reg_alpha': 0.706963858463172, 'reg_lambda': 2.1215665409160955, 'gamma': 0.9938427850497089}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:04:32,957] Trial 21 finished with value: 0.9685482132720742 and parameters: {'n_estimators': 2418, 'learning_rate': 0.04032891518735913, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.8985088151036531, 'colsample_bytree': 0.8261442043181213, 'reg_alpha': 1.520283912164619, 'reg_lambda': 3.1558290948988725, 'gamma': 0.6990242621704733}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:07:24,146] Trial 22 finished with value: 0.9685705202072713 and parameters: {'n_estimators': 2299, 'learning_rate': 0.040462254876629716, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.880164288279437, 'colsample_bytree': 0.7843544449087234, 'reg_alpha': 1.7404268484824685, 'reg_lambda': 3.701769326617604, 'gamma': 0.5488163533512334}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:10:39,757] Trial 23 finished with value: 0.9685967351693886 and parameters: {'n_estimators': 2232, 'learning_rate': 0.03541637056222954, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.8760052469445377, 'colsample_bytree': 0.7837467057556148, 'reg_alpha': 1.7551103922407045, 'reg_lambda': 3.2730717745677778, 'gamma': 0.5180316543892582}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:13:01,762] Trial 24 finished with value: 0.9685375946923912 and parameters: {'n_estimators': 2278, 'learning_rate': 0.05069931042013509, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.8756670794814093, 'colsample_bytree': 0.7824505524025784, 'reg_alpha': 1.7656927235231779, 'reg_lambda': 3.5048444092558335, 'gamma': 0.5550196256555672}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:14:59,782] Trial 25 finished with value: 0.9684086434707917 and parameters: {'n_estimators': 2686, 'learning_rate': 0.059634969996231725, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.876294883888365, 'colsample_bytree': 0.8593406338373164, 'reg_alpha': 1.3942794805927359, 'reg_lambda': 3.5155657480075657, 'gamma': 0.4013971858561438}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:17:56,493] Trial 26 finished with value: 0.9685319947672966 and parameters: {'n_estimators': 2229, 'learning_rate': 0.03637106055995824, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.8380521430564344, 'colsample_bytree': 0.8115035303145616, 'reg_alpha': 1.7037467588646507, 'reg_lambda': 3.0205556555689954, 'gamma': 0.7575431762403109}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:21:36,710] Trial 27 finished with value: 0.9685797020584769 and parameters: {'n_estimators': 2547, 'learning_rate': 0.041770924261068314, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.8759480939746173, 'colsample_bytree': 0.7858858120192809, 'reg_alpha': 1.8141895329186388, 'reg_lambda': 3.752406525736708, 'gamma': 0.5982343179060122}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:24:20,756] Trial 28 finished with value: 0.9684419594015342 and parameters: {'n_estimators': 2564, 'learning_rate': 0.04815612813384653, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.8385590482386824, 'colsample_bytree': 0.8127559344204559, 'reg_alpha': 1.8307260283340514, 'reg_lambda': 2.4091047612786354, 'gamma': 0.6079220082693564}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:27:00,771] Trial 29 finished with value: 0.9684586847392644 and parameters: {'n_estimators': 2902, 'learning_rate': 0.05420177949435838, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.8695450968301509, 'colsample_bytree': 0.794336170393672, 'reg_alpha': 1.4682318043922964, 'reg_lambda': 4.359353824726454, 'gamma': 0.6002411647513862}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:28:50,267] Trial 30 finished with value: 0.9682874696461801 and parameters: {'n_estimators': 3310, 'learning_rate': 0.06585527130069006, 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.8405941026091306, 'colsample_bytree': 0.872161454205605, 'reg_alpha': 1.5916724619154088, 'reg_lambda': 1.8136773522298422, 'gamma': 0.7530878527989517}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:32:29,605] Trial 31 finished with value: 0.9685899528509596 and parameters: {'n_estimators': 2146, 'learning_rate': 0.04129994985805446, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.8850986758981175, 'colsample_bytree': 0.7880362305909715, 'reg_alpha': 1.8087787931058803, 'reg_lambda': 3.3102680420664576, 'gamma': 0.5719406095472662}. Best is trial 18 with value: 0.9685970618366178.
[I 2025-08-07 14:36:42,614] Trial 32 finished with value: 0.9686084599363983 and parameters: {'n_estimators': 2084, 'learning_rate': 0.04241228443916973, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8894733238422363, 'colsample_bytree': 0.7755272104546164, 'reg_alpha': 1.8455318772526679, 'reg_lambda': 3.297335548087981, 'gamma': 0.48321415961320585}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 14:40:34,707] Trial 33 finished with value: 0.968551640078298 and parameters: {'n_estimators': 2119, 'learning_rate': 0.04338840570520476, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8892626092582322, 'colsample_bytree': 0.7674550416274887, 'reg_alpha': 1.8877025633427411, 'reg_lambda': 3.232533496601635, 'gamma': 0.4904619715408989}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 14:44:38,604] Trial 34 finished with value: 0.9685770643353221 and parameters: {'n_estimators': 2139, 'learning_rate': 0.038241088360278556, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.8648283507568529, 'colsample_bytree': 0.774651833441289, 'reg_alpha': 1.6539527315495488, 'reg_lambda': 2.439312360251772, 'gamma': 0.4168883638319408}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 14:48:27,799] Trial 35 finished with value: 0.9685951346600735 and parameters: {'n_estimators': 2422, 'learning_rate': 0.03347798379949189, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8882143847643476, 'colsample_bytree': 0.7286400425952747, 'reg_alpha': 1.3932743053643368, 'reg_lambda': 2.9085352944240426, 'gamma': 0.2745171391685712}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 14:54:33,093] Trial 36 finished with value: 0.9684932996227612 and parameters: {'n_estimators': 2383, 'learning_rate': 0.03357339868321989, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.8990308670613424, 'colsample_bytree': 0.7281870439848215, 'reg_alpha': 1.426841843545473, 'reg_lambda': 2.9571353188970173, 'gamma': 0.29402528460586036}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 14:59:21,289] Trial 37 finished with value: 0.9685637807325058 and parameters: {'n_estimators': 2763, 'learning_rate': 0.03422300656050513, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.8470292552950044, 'colsample_bytree': 0.7271902732118719, 'reg_alpha': 1.0168717021941491, 'reg_lambda': 2.42473824681267, 'gamma': 0.14387613651266906}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:03:57,400] Trial 38 finished with value: 0.9685204983598441 and parameters: {'n_estimators': 2460, 'learning_rate': 0.03332480855354848, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8689805955783403, 'colsample_bytree': 0.7552082826946688, 'reg_alpha': 0.3038203991932765, 'reg_lambda': 2.977853990759783, 'gamma': 0.2934095366949327}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:09:15,931] Trial 39 finished with value: 0.968476502910395 and parameters: {'n_estimators': 3535, 'learning_rate': 0.03768389782907463, 'max_depth': 7, 'min_child_weight': 5, 'subsample': 0.8870508385440137, 'colsample_bytree': 0.806852055005723, 'reg_alpha': 1.1240542693372433, 'reg_lambda': 4.247064792967203, 'gamma': 0.4431761270391086}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:12:54,845] Trial 40 finished with value: 0.9685076014200591 and parameters: {'n_estimators': 3114, 'learning_rate': 0.043522297347761606, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.867150342933512, 'colsample_bytree': 0.7355065436663255, 'reg_alpha': 1.3462940653444355, 'reg_lambda': 3.3886384477538236, 'gamma': 0.33644244669160894}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:16:17,622] Trial 41 finished with value: 0.9685460451896585 and parameters: {'n_estimators': 2152, 'learning_rate': 0.04762410330034662, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.8866180955645535, 'colsample_bytree': 0.7741620346345551, 'reg_alpha': 1.8695575045928476, 'reg_lambda': 3.2620665862348606, 'gamma': 0.5329880533144051}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:19:59,455] Trial 42 finished with value: 0.9685735224213274 and parameters: {'n_estimators': 2067, 'learning_rate': 0.036007975914862204, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.8837510159741416, 'colsample_bytree': 0.8018471357933656, 'reg_alpha': 1.5986751297301085, 'reg_lambda': 2.8498277449693017, 'gamma': 0.1035947169887228}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:23:22,124] Trial 43 finished with value: 0.9684125545367672 and parameters: {'n_estimators': 2003, 'learning_rate': 0.04292813206599627, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.7576374368586407, 'colsample_bytree': 0.8482233882320287, 'reg_alpha': 1.9891736607326924, 'reg_lambda': 3.118779879780776, 'gamma': 0.5115330697360837}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:27:25,906] Trial 44 finished with value: 0.9685951028089347 and parameters: {'n_estimators': 2212, 'learning_rate': 0.03869431968241978, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.8942471672342232, 'colsample_bytree': 0.823759548165568, 'reg_alpha': 1.7604837999849499, 'reg_lambda': 3.9450024171593716, 'gamma': 0.2310154219068074}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:31:57,362] Trial 45 finished with value: 0.9684731902304702 and parameters: {'n_estimators': 2966, 'learning_rate': 0.045751892253505955, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.8930602579182858, 'colsample_bytree': 0.8239655586433066, 'reg_alpha': 1.673608109766161, 'reg_lambda': 3.9842411922162886, 'gamma': 0.2374887759598658}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:36:09,316] Trial 46 finished with value: 0.9685340943544037 and parameters: {'n_estimators': 2643, 'learning_rate': 0.03856905320270151, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8992963809039051, 'colsample_bytree': 0.8362502434563537, 'reg_alpha': 1.2437210485609844, 'reg_lambda': 4.692988148459355, 'gamma': 0.2444166487449254}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:41:52,534] Trial 47 finished with value: 0.9684550945005057 and parameters: {'n_estimators': 3330, 'learning_rate': 0.032663322034005654, 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.8585221498006452, 'colsample_bytree': 0.8959247074152795, 'reg_alpha': 1.4862112189301173, 'reg_lambda': 3.7302889554221603, 'gamma': 0.8088903146556538}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:46:16,263] Trial 48 finished with value: 0.9685140387775778 and parameters: {'n_estimators': 2454, 'learning_rate': 0.03517406326288233, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.8755600448653386, 'colsample_bytree': 0.8457341281507705, 'reg_alpha': 1.9289213274655754, 'reg_lambda': 1.9455145231215816, 'gamma': 0.09870018198996322}. Best is trial 32 with value: 0.9686084599363983.
[I 2025-08-07 15:50:39,129] Trial 49 finished with value: 0.9680490165066125 and parameters: {'n_estimators': 2336, 'learning_rate': 0.030639974765641696, 'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.8485940876094181, 'colsample_bytree': 0.7586569969544734, 'reg_alpha': 0.914079879556502, 'reg_lambda': 2.574204435311884, 'gamma': 0.3617514035297955}. Best is trial 32 with value: 0.9686084599363983.
Best XGBoost CV AUC: 0.9686
Best XGBoost params: {'n_estimators': 2084, 'learning_rate': 0.04241228443916973, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.8894733238422363, 'colsample_bytree': 0.7755272104546164, 'reg_alpha': 1.8455318772526679, 'reg_lambda': 3.297335548087981, 'gamma': 0.48321415961320585}
In [ ]:
# CatBoost Hyperparameter Tuning - Optimized for diversity from LightGBM
def objective_catboost(trial):
    # Parameter space designed to be different from LightGBM
    params = {
        'iterations': trial.suggest_int('iterations', 2000, 5000),
        'learning_rate': trial.suggest_float('learning_rate', 0.03, 0.08),
        'depth': trial.suggest_int('depth', 6, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        'border_count': trial.suggest_int('border_count', 128, 255),
        'random_strength': trial.suggest_float('random_strength', 0.5, 2.0),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_state': 42,
        'verbose': False,
        'early_stopping_rounds': 100
    }

    # 5-fold CV for faster tuning
    cv_scores = []
    kf_tune = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in kf_tune.split(X, y):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

        # Add bank data
        X_train_fold = pd.concat([X_train_fold, X_bank])
        y_train_fold = pd.concat([y_train_fold, y_bank])

        model = cb.CatBoostClassifier(**params)
        model.fit(X_train_fold, y_train_fold, eval_set=(X_val_fold, y_val_fold), verbose=False)

        pred = model.predict_proba(X_val_fold)[:, 1]
        score = roc_auc_score(y_val_fold, pred)
        cv_scores.append(score)

    return np.mean(cv_scores)

print("Tuning CatBoost parameters...")
study_catboost = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))      
study_catboost.optimize(objective_catboost, n_trials=50)

best_catboost_params = study_catboost.best_params
print(f"Best CatBoost CV AUC: {study_catboost.best_value:.4f}")
print(f"Best CatBoost params: {best_catboost_params}")
[I 2025-08-07 15:50:39,142] A new study created in memory with name: no-name-31037c8e-fb7c-4f8c-86cc-f3a82f5285aa
Tuning CatBoost parameters...
[I 2025-08-07 15:59:19,874] Trial 0 finished with value: 0.9667201844179077 and parameters: {'iterations': 3123, 'learning_rate': 0.0775357153204958, 'depth': 9, 'l2_leaf_reg': 6.387926357773329, 'border_count': 147, 'random_strength': 0.7339917805043039, 'bagging_temperature': 0.05808361216819946}. Best is trial 0 with value: 0.9667201844179077.
[I 2025-08-07 16:10:40,762] Trial 1 finished with value: 0.9674417394062009 and parameters: {'iterations': 4599, 'learning_rate': 0.060055750587160436, 'depth': 9, 'l2_leaf_reg': 1.185260448662222, 'border_count': 252, 'random_strength': 1.7486639612006325, 'bagging_temperature': 0.21233911067827616}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 16:20:45,734] Trial 2 finished with value: 0.9664948735192249 and parameters: {'iterations': 2545, 'learning_rate': 0.03917022549267169, 'depth': 7, 'l2_leaf_reg': 5.72280788469014, 'border_count': 183, 'random_strength': 0.9368437102970628, 'bagging_temperature': 0.6118528947223795}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 16:30:03,526] Trial 3 finished with value: 0.9668437071626259 and parameters: {'iterations': 2418, 'learning_rate': 0.04460723242676091, 'depth': 7, 'l2_leaf_reg': 5.104629857953324, 'border_count': 228, 'random_strength': 0.7995106732375397, 'bagging_temperature': 0.5142344384136116}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 16:47:14,869] Trial 4 finished with value: 0.9666843667800308 and parameters: {'iterations': 3777, 'learning_rate': 0.032322520635999885, 'depth': 9, 'l2_leaf_reg': 2.5347171131856236, 'border_count': 136, 'random_strength': 1.92332830588, 'bagging_temperature': 0.9656320330745594}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 17:03:07,524] Trial 5 finished with value: 0.9669287849486056 and parameters: {'iterations': 4426, 'learning_rate': 0.04523068845866853, 'depth': 6, 'l2_leaf_reg': 7.158097238609412, 'border_count': 184, 'random_strength': 0.6830573522671682, 'bagging_temperature': 0.4951769101112702}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 17:11:19,099] Trial 6 finished with value: 0.9668431460298846 and parameters: {'iterations': 2103, 'learning_rate': 0.0754660201039391, 'depth': 7, 'l2_leaf_reg': 6.962700559185838, 'border_count': 167, 'random_strength': 1.2801020317667162, 'bagging_temperature': 0.5467102793432796}. Best is trial 1 with value: 0.9674417394062009.
[I 2025-08-07 17:21:44,760] Trial 7 finished with value: 0.9675255790147533 and parameters: {'iterations': 2554, 'learning_rate': 0.07847923138822793, 'depth': 9, 'l2_leaf_reg': 9.455490474077703, 'border_count': 242, 'random_strength': 1.3968499682166278, 'bagging_temperature': 0.9218742350231168}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 17:29:52,473] Trial 8 finished with value: 0.9658251102966036 and parameters: {'iterations': 2265, 'learning_rate': 0.03979914312095726, 'depth': 6, 'l2_leaf_reg': 3.927972976869379, 'border_count': 177, 'random_strength': 0.9070235476608439, 'bagging_temperature': 0.8287375091519293}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 17:43:07,376] Trial 9 finished with value: 0.9674044399850678 and parameters: {'iterations': 3070, 'learning_rate': 0.04404672548436904, 'depth': 8, 'l2_leaf_reg': 2.2683180247728636, 'border_count': 230, 'random_strength': 0.6118259655196563, 'bagging_temperature': 0.9868869366005173}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 17:57:24,028] Trial 10 finished with value: 0.9671118153757383 and parameters: {'iterations': 3541, 'learning_rate': 0.06499375049607987, 'depth': 10, 'l2_leaf_reg': 9.59625943278804, 'border_count': 211, 'random_strength': 1.3628121002229092, 'bagging_temperature': 0.7303668952070097}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 18:14:59,081] Trial 11 finished with value: 0.9673944906871246 and parameters: {'iterations': 4929, 'learning_rate': 0.060101379931577215, 'depth': 10, 'l2_leaf_reg': 9.93149973182709, 'border_count': 249, 'random_strength': 1.6974075590594648, 'bagging_temperature': 0.1533681809513373}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 18:25:17,159] Trial 12 finished with value: 0.9673831300845113 and parameters: {'iterations': 4165, 'learning_rate': 0.0680821342737341, 'depth': 9, 'l2_leaf_reg': 1.1963612909754555, 'border_count': 249, 'random_strength': 1.5667091934615425, 'bagging_temperature': 0.2646596329637791}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 18:41:21,287] Trial 13 finished with value: 0.9674887704614047 and parameters: {'iterations': 4997, 'learning_rate': 0.05406449773780332, 'depth': 8, 'l2_leaf_reg': 8.316378003207923, 'border_count': 211, 'random_strength': 1.9883771855808856, 'bagging_temperature': 0.34279928826631634}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 18:54:12,018] Trial 14 finished with value: 0.967349560003362 and parameters: {'iterations': 2872, 'learning_rate': 0.05276040985150533, 'depth': 8, 'l2_leaf_reg': 8.461514548344311, 'border_count': 207, 'random_strength': 1.9886919905347793, 'bagging_temperature': 0.3276567589577757}. Best is trial 7 with value: 0.9675255790147533.
[I 2025-08-07 19:11:17,085] Trial 15 finished with value: 0.967578450157388 and parameters: {'iterations': 3934, 'learning_rate': 0.052825853293157414, 'depth': 8, 'l2_leaf_reg': 8.272594310024477, 'border_count': 225, 'random_strength': 1.1201211495390695, 'bagging_temperature': 0.3565126982022847}. Best is trial 15 with value: 0.967578450157388.
In [ ]:
# Train finetuned CatBoost with same CV structure as your LightGBM   
print("Training finetuned CatBoost...")
y_probs_cat_tuned = np.zeros(len(X_test))
catboost_models_tuned = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f"CatBoost fold {fold + 1}/{n_splits}")
    X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
    X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

    # Add bank data like your LightGBM
    X_train_fold = pd.concat([X_train_fold, X_bank])
    y_train_fold = pd.concat([y_train_fold, y_bank])

    model = cb.CatBoostClassifier(**best_catboost_params)
    model.fit(
        X_train_fold,
        y_train_fold,
        eval_set=(X_val_fold, y_val_fold),
        verbose=500,
        use_best_model=True
    )

    catboost_models_tuned.append(model)
    y_probs_cat_tuned += model.predict_proba(X_test)[:, 1] / n_splits

print("CatBoost training completed!")
In [ ]:
# Train finetuned XGBoost with same CV structure as your LightGBM    
print("Training finetuned XGBoost...")
y_probs_xgb_tuned = np.zeros(len(X_test))
xgboost_models_tuned = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f"XGBoost fold {fold + 1}/{n_splits}")
    X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
    X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

    # Add bank data like your LightGBM
    X_train_fold = pd.concat([X_train_fold, X_bank])
    y_train_fold = pd.concat([y_train_fold, y_bank])

    model = xgb.XGBClassifier(**best_xgboost_params)
    model.fit(
        X_train_fold,
        y_train_fold,
        eval_set=[(X_val_fold, y_val_fold)],
        verbose=500
    )

    xgboost_models_tuned.append(model)
    y_probs_xgb_tuned += model.predict_proba(X_test)[:, 1] / n_splits

print("XGBoost training completed!")
In [ ]:
# Create ensemble from all 3 finetuned models
from scipy.optimize import minimize
from sklearn.metrics import roc_auc_score

print("=== CREATING OPTIMIZED 3-MODEL ENSEMBLE ===")

# Generate out-of-fold predictions for validation
def get_oof_predictions_simple(models_list, X_data, y_data, kf):
    """Generate out-of-fold predictions"""
    oof_preds = np.zeros(len(X_data))

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_data, y_data)):
        model = models_list[fold]
        oof_preds[val_idx] = model.predict_proba(X_data.iloc[val_idx])[:, 1]

    return oof_preds

# Get OOF predictions for ensemble optimization
lgb_oof = get_oof_predictions_simple(models, X, y, kf)
cat_oof = get_oof_predictions_simple(catboost_models_tuned, X, y, kf)
xgb_oof = get_oof_predictions_simple(xgboost_models_tuned, X, y, kf)

# Individual model scores
lgb_score = roc_auc_score(y, lgb_oof)
cat_score = roc_auc_score(y, cat_oof)
xgb_score = roc_auc_score(y, xgb_oof)

print(f"Individual Model CV Scores:")
print(f"LightGBM:     {lgb_score:.4f}")
print(f"CatBoost:     {cat_score:.4f}")
print(f"XGBoost:      {xgb_score:.4f}")
print()

# Optimize ensemble weights
def ensemble_loss(weights, *args):
    lgb_pred, cat_pred, xgb_pred, y_true = args
    weights = weights / weights.sum()  # Normalize
    ensemble_pred = weights[0] * lgb_pred + weights[1] * cat_pred + weights[2] * xgb_pred
    return -roc_auc_score(y_true, ensemble_pred)  # Negative because we minimize

result = minimize(
    ensemble_loss,
    x0=[1, 1, 1],  # Initial equal weights
    args=(lgb_oof, cat_oof, xgb_oof, y),
    bounds=[(0.01, 5), (0.01, 5), (0.01, 5)],
    method='L-BFGS-B'
)

optimal_weights = result.x / result.x.sum()
print(f"Optimal weights:")
print(f"  LightGBM: {optimal_weights[0]:.3f}")
print(f"  CatBoost: {optimal_weights[1]:.3f}")
print(f"  XGBoost:  {optimal_weights[2]:.3f}")

# Create final ensemble predictions
ensemble_oof = (optimal_weights[0] * lgb_oof +
                optimal_weights[1] * cat_oof +
                optimal_weights[2] * xgb_oof)

ensemble_test = (optimal_weights[0] * y_probs +
                optimal_weights[1] * y_probs_cat_tuned +
                optimal_weights[2] * y_probs_xgb_tuned)

ensemble_score = roc_auc_score(y, ensemble_oof)
improvement = ensemble_score - lgb_score

print(f"\nEnsemble Performance:")
print(f"Ensemble CV AUC: {ensemble_score:.4f}")
print(f"Improvement:     {improvement:+.4f} over best single model")

if improvement > 0:
    print("✅ Ensemble improves performance!")
else:
    print("⚠️  Ensemble doesn't improve - consider using LightGBM only")
In [ ]:
# Save final ensemble results
if improvement > 0:
    # Use ensemble if it improves
    final_predictions = ensemble_test
    method_used = "Weighted Ensemble"
    final_score = ensemble_score
else:
    # Use LightGBM if ensemble doesn't help
    final_predictions = y_probs
    method_used = "LightGBM Only"
    final_score = lgb_score

# Save main submission
final_submission = pd.DataFrame({
    'id': test.id,
    'y': final_predictions
})

final_submission.to_csv('final_3model_ensemble.csv', index=False)
print(f"Final submission saved as 'final_3model_ensemble.csv'")
print(f"Method used: {method_used}")
print(f"CV AUC: {final_score:.4f}")

# Also save all predictions for comparison
all_predictions = pd.DataFrame({
    'id': test.id,
    'lightgbm': y_probs,
    'catboost_tuned': y_probs_cat_tuned,
    'xgboost_tuned': y_probs_xgb_tuned,
    'weighted_ensemble': ensemble_test
})

all_predictions.to_csv('all_model_predictions.csv', index=False)
print("All model predictions saved as 'all_model_predictions.csv'")

print(f"\n🏆 FINAL RESULTS:")
print(f"{'='*50}")
print(f"Best method: {method_used}")
print(f"Final CV AUC: {final_score:.4f}")
if improvement > 0:
    print(f"Improvement: +{improvement:.4f} over LightGBM")
else:
    print("Ensemble didn't improve - using single model")
print(f"{'='*50}")