# !/usr/bin/env python3
# -*- coding: utf-8 -*-

# =====================
# General utilities
# =====================
import json
import os
import pickle
import time
from collections import Counter

# =====================
# Data handling & processing
# =====================
import numpy as np
import pandas as pd
from tqdm import tqdm

# =====================
# Visualization
# =====================
import matplotlib.pyplot as plt
import seaborn as sns

# =====================
# Machine Learning - Core scikit-learn
# =====================
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, chi2, mutual_info_classif
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    mean_absolute_error, mean_squared_error, r2_score,
    root_mean_squared_error, roc_auc_score
)
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler
from sklearn.svm import SVC, SVR

# =====================
# Machine Learning - Tree Boosting & advanced
# =====================
import xgboost as xg
import lightgbm as lgb
import catboost as cb

# =====================
# Deep Learning - TensorFlow / Keras
# =====================
import tensorflow as tf
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam

# =====================
# Imbalanced data handling
# =====================
from imblearn.over_sampling import SMOTE

# =====================
# Optimization / AutoML
# =====================
import optuna

# =====================
# Feature importance & explainability
# =====================
import shap

# =====================
# Self Made Utilities
# =====================
from utils import *

# =====================
# Settings & reproducibility
# =====================
import warnings
warnings.filterwarnings("ignore")

SEED = 42
np.random.seed(SEED)

print("Libraries successfully loaded. Ready to go!")

Libraries successfully loaded. Ready to go!

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

feature_cols = [col for col in train.columns if col not in ['id', 'BeatsPerMinute']]
target_col = 'BeatsPerMinute'

print("\n" + "="*50)
print("🔗 CORRELATION ANALYSIS")
print("="*50)

# Calculate correlation matrix
correlation_matrix = train[feature_cols + [target_col]].corr()

# Plot correlation heatmap
plt.figure(figsize=(12, 10))
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='RdBu_r', 
            center=0, square=True, fmt='.3f', cbar_kws={"shrink": .8})
plt.title('🔗 Feature Correlation Matrix', fontsize=14, pad=20)
plt.tight_layout()
plt.show()

# Feature importance based on correlation with target
target_correlations = correlation_matrix[target_col].drop(target_col).abs().sort_values(ascending=False)
print("\n🎯 Features ranked by correlation with BPM:")
for i, (feature, corr) in enumerate(target_correlations.items(), 1):
    print(f"{i:2d}. {feature:<25} | Correlation: {corr:.4f}")

==================================================
🔗 CORRELATION ANALYSIS
==================================================

🎯 Features ranked by correlation with BPM:
 1. MoodScore                 | Correlation: 0.0071
 2. TrackDurationMs           | Correlation: 0.0066
 3. RhythmScore               | Correlation: 0.0054
 4. VocalContent              | Correlation: 0.0049
 5. Energy                    | Correlation: 0.0044
 6. LivePerformanceLikelihood | Correlation: 0.0035
 7. AudioLoudness             | Correlation: 0.0033
 8. InstrumentalScore         | Correlation: 0.0019
 9. AcousticQuality           | Correlation: 0.0008

print("\n" + "="*50)
print("🧮 FEATURE ENGINEERING")
print("="*50)

def create_features(df):
    """Create additional features that might help predict BPM"""
    df = df.copy()
    
    # 1. Rhythm and Energy interactions
    df['RhythmEnergyProduct'] = df['RhythmScore'] * df['Energy']
    df['RhythmEnergyRatio'] = df['RhythmScore'] / (df['Energy'] + 1e-8)
    
    # 2. Audio characteristics
    df['LoudnessEnergyProduct'] = df['AudioLoudness'] * df['Energy']
    df['VocalInstrumentalRatio'] = df['VocalContent'] / (df['InstrumentalScore'] + 1e-8)
    
    # 3. Track duration features
    df['TrackDurationMin'] = df['TrackDurationMs'] / 60000  # Convert to minutes
    df['DurationMoodProduct'] = df['TrackDurationMin'] * df['MoodScore']
    
    # 4. Performance and quality features
    df['QualityPerformanceProduct'] = df['AcousticQuality'] * df['LivePerformanceLikelihood']
    
    # 5. Polynomial features for top correlated features
    top_3_features = target_correlations.head(3).index.tolist()
    for feature in top_3_features:
        df[f'{feature}_squared'] = df[feature] ** 2
        df[f'{feature}_sqrt'] = np.sqrt(np.abs(df[feature]))
    
    # 6. Binned features
    df['EnergyBin'] = pd.cut(df['Energy'], bins=5, labels=['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'])
    df['RhythmBin'] = pd.cut(df['RhythmScore'], bins=5, labels=['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'])
    
    # 7. Interaction between rhythm and tempo-related features
    df['RhythmDurationInteraction'] = df['RhythmScore'] * df['TrackDurationMin']

    # 8. Log transformations
    df['LogTrackDuration'] = np.log1p(df['TrackDurationMs'])
    df['LogLoudness'] = np.log1p(np.abs(df['AudioLoudness']) + 1e-8)  # Avoid log(0)
    
    # Clustering features (add musical genre-like groupings)
    # from sklearn.cluster import KMeans
    # cluster_features = ['Energy', 'RhythmScore', 'AudioLoudness', 'VocalContent']
    # kmeans = KMeans(n_clusters=5, random_state=42)
    # df['MusicCluster'] = kmeans.fit_predict(df[cluster_features])
    
    return df
    
# Apply feature engineering
train_engineered = create_features(train)
test_engineered = create_features(test)

# Get new feature columns
new_features = [col for col in train_engineered.columns if col not in train.columns]
print(f"✨ Created {len(new_features)} new features:")
for feature in new_features:
    print(f"   • {feature}")

==================================================
🧮 FEATURE ENGINEERING
==================================================
✨ Created 23 new features:
   • RhythmEnergyProduct
   • RhythmEnergyRatio
   • LoudnessEnergyProduct
   • VocalInstrumentalRatio
   • TrackDurationMin
   • DurationMoodProduct
   • QualityPerformanceProduct
   • MoodScore_squared
   • MoodScore_sqrt
   • TrackDurationMs_squared
   • TrackDurationMs_sqrt
   • RhythmScore_squared
   • RhythmScore_sqrt
   • EnergyBin
   • RhythmBin
   • RhythmDurationInteraction
   • LogTrackDuration
   • LogLoudness
   • FeatureMean
   • FeatureStd
   • FeatureSkew
   • Energy_Loudness_Ratio
   • Rhythm_Duration_Normalized

numerical_features = train_engineered.select_dtypes(include=[np.number]).columns.tolist()
feature_columns = [col for col in numerical_features if col not in ['id', 'BeatsPerMinute']]

# Prepare features for modeling
feature_columns = [col for col in numerical_features if col != target_col]
X = train_engineered[feature_columns]
y = train_engineered[target_col]

print(f"📊 Training with {len(feature_columns)} features")
print(f"🎯 Target variable: {target_col}")

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_test = create_features(test)[feature_columns]

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

📊 Training with 31 features
🎯 Target variable: BeatsPerMinute

def objective_xgboost(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.5, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 0.9),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 2.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1.0, 5.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'random_state': 42,
        'eval_metric': 'rmse',
        'early_stopping_rounds': 100,
        'verbosity': 0,

        # GPU acceleration parameters
        'tree_method': 'gpu_hist',  # Use GPU histogram method
        'device': 'cuda',           # Specify CUDA device
    }

    cv_scores = []
    kf_tune = KFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in kf_tune.split(X, y):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

        model = xg.XGBRegressor(**params)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)], verbose=False)

        pred = model.predict(X_val_fold)
        score = root_mean_squared_error(y_val_fold, pred)
        cv_scores.append(score)

    return np.mean(cv_scores)

print("➗ Tuning XGBoost parameters...")
study_xgboost = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))       
study_xgboost.optimize(objective_xgboost, n_trials=50)

best_xgboost_params = study_xgboost.best_params
print(f"Best XGBoost CV RMSE: {study_xgboost.best_value:.4f}")
print(f"Best XGBoost params: {best_xgboost_params}")

[I 2025-09-24 13:50:25,120] A new study created in memory with name: no-name-91f84a8a-4401-4910-b4ee-e4ce14827a66

➗ Tuning XGBoost parameters...

[I 2025-09-24 13:50:32,365] Trial 0 finished with value: 26.462705197382366 and parameters: {'n_estimators': 193, 'learning_rate': 0.09556428757689246, 'max_depth': 7, 'min_child_weight': 5, 'subsample': 0.5624074561769746, 'colsample_bytree': 0.562397808134481, 'reg_alpha': 0.21035886311957896, 'reg_lambda': 4.46470458309974, 'gamma': 0.6011150117432088}. Best is trial 0 with value: 26.462705197382366.
[I 2025-09-24 13:50:45,336] Trial 1 finished with value: 26.46111143494931 and parameters: {'n_estimators': 357, 'learning_rate': 0.011852604486622221, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.5849356442713105, 'colsample_bytree': 0.5727299868828403, 'reg_alpha': 0.4484685687215243, 'reg_lambda': 2.216968971838151, 'gamma': 0.5247564316322378}. Best is trial 1 with value: 26.46111143494931.
[I 2025-09-24 13:50:52,251] Trial 2 finished with value: 26.460444043021916 and parameters: {'n_estimators': 222, 'learning_rate': 0.036210622617823776, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.6168578594140872, 'colsample_bytree': 0.6465447373174766, 'reg_alpha': 0.9665329700123682, 'reg_lambda': 4.140703845572054, 'gamma': 0.19967378215835974}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:50:57,780] Trial 3 finished with value: 26.460464905338586 and parameters: {'n_estimators': 262, 'learning_rate': 0.06331731119758383, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5682096494749166, 'colsample_bytree': 0.5260206371941119, 'reg_alpha': 1.9028825207813331, 'reg_lambda': 4.862528132298237, 'gamma': 0.8083973481164611}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:06,438] Trial 4 finished with value: 26.460774262940554 and parameters: {'n_estimators': 159, 'learning_rate': 0.018790490260574548, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.5488152939379115, 'colsample_bytree': 0.6980707640445081, 'reg_alpha': 0.16533819011891496, 'reg_lambda': 4.637281608315128, 'gamma': 0.2587799816000169}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:12,929] Trial 5 finished with value: 26.460761069452126 and parameters: {'n_estimators': 335, 'learning_rate': 0.038053996848046986, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.5739417822102109, 'colsample_bytree': 0.8878338511058235, 'reg_alpha': 1.5727523643861177, 'reg_lambda': 4.757995766256757, 'gamma': 0.8948273504276488}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:18,275] Trial 6 finished with value: 26.46077790449839 and parameters: {'n_estimators': 303, 'learning_rate': 0.09296868115208053, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.5180909155642153, 'colsample_bytree': 0.6301321323053057, 'reg_alpha': 0.8384868504100158, 'reg_lambda': 2.0853961270955836, 'gamma': 0.8287375091519293}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:25,191] Trial 7 finished with value: 26.46103166897681 and parameters: {'n_estimators': 185, 'learning_rate': 0.03528410587186427, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8208787923016159, 'colsample_bytree': 0.5298202574719083, 'reg_alpha': 1.975085179540983, 'reg_lambda': 4.08897907718663, 'gamma': 0.1987156815341724}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:28,101] Trial 8 finished with value: 26.462743251022072 and parameters: {'n_estimators': 12, 'learning_rate': 0.08339152856093508, 'max_depth': 7, 'min_child_weight': 6, 'subsample': 0.8085081386743783, 'colsample_bytree': 0.5296178606936361, 'reg_alpha': 0.7810848842341179, 'reg_lambda': 1.4634762381005189, 'gamma': 0.8631034258755935}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:34,123] Trial 9 finished with value: 26.460733879913032 and parameters: {'n_estimators': 316, 'learning_rate': 0.03978082223673843, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.6300733288106988, 'colsample_bytree': 0.7918424713352257, 'reg_alpha': 1.3113591955749049, 'reg_lambda': 4.548850970305306, 'gamma': 0.4722149251619493}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:39,849] Trial 10 finished with value: 26.4608360054537 and parameters: {'n_estimators': 474, 'learning_rate': 0.060798328645988865, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.7010124870699186, 'colsample_bytree': 0.7084932353340201, 'reg_alpha': 1.149171357536858, 'reg_lambda': 3.496334108689461, 'gamma': 0.039907535719484855}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:44,351] Trial 11 finished with value: 26.460508868110384 and parameters: {'n_estimators': 96, 'learning_rate': 0.06712185697042802, 'max_depth': 4, 'min_child_weight': 7, 'subsample': 0.6737425087025646, 'colsample_bytree': 0.6375994851305036, 'reg_alpha': 1.8942038075472458, 'reg_lambda': 3.4287370961329025, 'gamma': 0.6766034627526838}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:50,051] Trial 12 finished with value: 26.46045414542298 and parameters: {'n_estimators': 251, 'learning_rate': 0.052709692658491805, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.7426740033019517, 'colsample_bytree': 0.6413584553307843, 'reg_alpha': 1.5151366638571864, 'reg_lambda': 3.724682043951349, 'gamma': 0.33090752060989614}. Best is trial 2 with value: 26.460444043021916.
[I 2025-09-24 13:51:56,211] Trial 13 finished with value: 26.460437580637556 and parameters: {'n_estimators': 444, 'learning_rate': 0.047107378789851655, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.7455551257989088, 'colsample_bytree': 0.6987586652315626, 'reg_alpha': 1.4604256314489064, 'reg_lambda': 3.7529180478007973, 'gamma': 0.3307022691734249}. Best is trial 13 with value: 26.460437580637556.
[I 2025-09-24 13:52:03,128] Trial 14 finished with value: 26.460649917410233 and parameters: {'n_estimators': 488, 'learning_rate': 0.027790835621382525, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8987403117469792, 'colsample_bytree': 0.7604911724169197, 'reg_alpha': 0.8888925710902702, 'reg_lambda': 2.8486310001742448, 'gamma': 0.050033752002234644}. Best is trial 13 with value: 26.460437580637556.
[I 2025-09-24 13:52:08,726] Trial 15 finished with value: 26.460581850327024 and parameters: {'n_estimators': 400, 'learning_rate': 0.049382695481741824, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.7561711433906496, 'colsample_bytree': 0.7088506916345483, 'reg_alpha': 1.3677037821910694, 'reg_lambda': 2.8774607876222316, 'gamma': 0.38783899472885225}. Best is trial 13 with value: 26.460437580637556.
[I 2025-09-24 13:52:15,033] Trial 16 finished with value: 26.46124000876423 and parameters: {'n_estimators': 415, 'learning_rate': 0.04753132614308796, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.631194128587385, 'colsample_bytree': 0.8003988944050103, 'reg_alpha': 0.5781820633718393, 'reg_lambda': 4.062978899982637, 'gamma': 0.16012336387635384}. Best is trial 13 with value: 26.460437580637556.
[I 2025-09-24 13:52:19,937] Trial 17 finished with value: 26.460434839877433 and parameters: {'n_estimators': 114, 'learning_rate': 0.02655808404967365, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.6559481420014495, 'colsample_bytree': 0.6721559487206559, 'reg_alpha': 1.0893867862908664, 'reg_lambda': 3.998566891806512, 'gamma': 0.3745544654214472}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:24,721] Trial 18 finished with value: 26.460785421093437 and parameters: {'n_estimators': 110, 'learning_rate': 0.022992405044187987, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.7011650549716373, 'colsample_bytree': 0.8608121237579316, 'reg_alpha': 1.655637992356045, 'reg_lambda': 3.301778313249126, 'gamma': 0.3992946483188063}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:27,449] Trial 19 finished with value: 26.462110635174707 and parameters: {'n_estimators': 12, 'learning_rate': 0.07245362426322442, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.7775605636050232, 'colsample_bytree': 0.7521072896918344, 'reg_alpha': 1.1690910148949127, 'reg_lambda': 2.5022773472254523, 'gamma': 0.618102976483014}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:32,133] Trial 20 finished with value: 26.46176699855722 and parameters: {'n_estimators': 80, 'learning_rate': 0.010839728938490965, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8712411472931488, 'colsample_bytree': 0.5960313971494949, 'reg_alpha': 1.7210920816295872, 'reg_lambda': 1.046483846510213, 'gamma': 0.9938427850497087}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:39,097] Trial 21 finished with value: 26.460611390050115 and parameters: {'n_estimators': 135, 'learning_rate': 0.030753170938854987, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.6519888012483408, 'colsample_bytree': 0.6834180073909231, 'reg_alpha': 1.0206505377417825, 'reg_lambda': 4.02395643192226, 'gamma': 0.29557964976826007}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:45,401] Trial 22 finished with value: 26.46099029378185 and parameters: {'n_estimators': 229, 'learning_rate': 0.0458009975152692, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6123309056025085, 'colsample_bytree': 0.6558991585520674, 'reg_alpha': 1.4121352810296501, 'reg_lambda': 3.790194097783015, 'gamma': 0.13216827715702567}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:49,805] Trial 23 finished with value: 26.461173302736977 and parameters: {'n_estimators': 63, 'learning_rate': 0.022965770789399542, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.725406154056739, 'colsample_bytree': 0.7401118174643373, 'reg_alpha': 1.079471062523167, 'reg_lambda': 4.259308962023541, 'gamma': 0.4441819536388816}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:52:59,632] Trial 24 finished with value: 26.46239525550933 and parameters: {'n_estimators': 197, 'learning_rate': 0.04196688687197312, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.6707145335615762, 'colsample_bytree': 0.6049387404105057, 'reg_alpha': 0.6700013896733816, 'reg_lambda': 3.7163137242504054, 'gamma': 0.26237966296059556}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:07,210] Trial 25 finished with value: 26.460918015082388 and parameters: {'n_estimators': 437, 'learning_rate': 0.03192787951723773, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.6082511538557175, 'colsample_bytree': 0.6611147829004439, 'reg_alpha': 1.2527656186858227, 'reg_lambda': 3.245611264282009, 'gamma': 0.3476888816364016}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:13,154] Trial 26 finished with value: 26.46068166105913 and parameters: {'n_estimators': 286, 'learning_rate': 0.05789915271016368, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.6683232274088278, 'colsample_bytree': 0.6738909866114129, 'reg_alpha': 0.9454092399872267, 'reg_lambda': 4.385620425193835, 'gamma': 0.10507033520400777}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:22,868] Trial 27 finished with value: 26.46112745487958 and parameters: {'n_estimators': 361, 'learning_rate': 0.018545531195078853, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.5016427380079551, 'colsample_bytree': 0.7268557414534376, 'reg_alpha': 1.2224780905332415, 'reg_lambda': 4.945498761348853, 'gamma': 0.21752093089729585}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:30,140] Trial 28 finished with value: 26.460581152332605 and parameters: {'n_estimators': 229, 'learning_rate': 0.04268103064092376, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.7852098043303755, 'colsample_bytree': 0.6135509248802381, 'reg_alpha': 1.4518180707082793, 'reg_lambda': 3.81968457333619, 'gamma': 0.5228648596429631}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:37,411] Trial 29 finished with value: 26.461041056760678 and parameters: {'n_estimators': 166, 'learning_rate': 0.055083744809636646, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.7064724090084442, 'colsample_bytree': 0.569346750770864, 'reg_alpha': 0.326987639946527, 'reg_lambda': 4.359353824726453, 'gamma': 0.5838242172953723}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:42,780] Trial 30 finished with value: 26.46064908564339 and parameters: {'n_estimators': 129, 'learning_rate': 0.02646160552320786, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.5373258511602691, 'colsample_bytree': 0.7881569745606399, 'reg_alpha': 0.7360015887655019, 'reg_lambda': 3.1842382665635265, 'gamma': 0.7109547014349408}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:49,594] Trial 31 finished with value: 26.46063454033324 and parameters: {'n_estimators': 223, 'learning_rate': 0.05038650335584005, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.7438896173626358, 'colsample_bytree': 0.6412696220005697, 'reg_alpha': 1.5160746959407518, 'reg_lambda': 3.644157999407305, 'gamma': 0.3062341325688827}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:53:56,740] Trial 32 finished with value: 26.460535586357093 and parameters: {'n_estimators': 261, 'learning_rate': 0.03688706880195039, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.7368904489800351, 'colsample_bytree': 0.681474523285374, 'reg_alpha': 1.7206949261746203, 'reg_lambda': 3.939037372172414, 'gamma': 0.357063030832568}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:03,019] Trial 33 finished with value: 26.46081437728966 and parameters: {'n_estimators': 360, 'learning_rate': 0.05313405888580647, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.6497251322039813, 'colsample_bytree': 0.5806722715911633, 'reg_alpha': 1.02532112925275, 'reg_lambda': 4.251121320433305, 'gamma': 0.43351046166963314}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:09,036] Trial 34 finished with value: 26.46048615232548 and parameters: {'n_estimators': 271, 'learning_rate': 0.07290557407473434, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.7698324574593982, 'colsample_bytree': 0.6214924093557153, 'reg_alpha': 1.6322098821278397, 'reg_lambda': 3.559352137147675, 'gamma': 0.31444864759908164}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:13,190] Trial 35 finished with value: 26.46190063838509 and parameters: {'n_estimators': 44, 'learning_rate': 0.016421111024305962, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.8275803495636975, 'colsample_bytree': 0.6571571340528016, 'reg_alpha': 0.504228447900329, 'reg_lambda': 3.063004065078422, 'gamma': 0.1977069372733152}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:22,088] Trial 36 finished with value: 26.462792933314137 and parameters: {'n_estimators': 204, 'learning_rate': 0.0642654506322954, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.7278835468798958, 'colsample_bytree': 0.7153129970677747, 'reg_alpha': 1.7811869803642715, 'reg_lambda': 4.5224890431839135, 'gamma': 0.2507630563923304}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:30,333] Trial 37 finished with value: 26.46082070126686 and parameters: {'n_estimators': 160, 'learning_rate': 0.03333127670342818, 'max_depth': 7, 'min_child_weight': 5, 'subsample': 0.5943432890911258, 'colsample_bytree': 0.5543805464237439, 'reg_alpha': 1.3098043222233948, 'reg_lambda': 2.6001306767458865, 'gamma': 0.5055155195523469}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:37,279] Trial 38 finished with value: 26.46048701623937 and parameters: {'n_estimators': 332, 'learning_rate': 0.046660612147591374, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6848714318156846, 'colsample_bytree': 0.6908496085431133, 'reg_alpha': 1.559967389056257, 'reg_lambda': 4.173955313282764, 'gamma': 0.5754907137289896}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:44,156] Trial 39 finished with value: 26.46054608462658 and parameters: {'n_estimators': 289, 'learning_rate': 0.04180071348497771, 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.557711078402639, 'colsample_bytree': 0.5037060479627069, 'reg_alpha': 1.4716145681470432, 'reg_lambda': 4.709700985363039, 'gamma': 0.34703947490532294}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:51,373] Trial 40 finished with value: 26.461422027428874 and parameters: {'n_estimators': 394, 'learning_rate': 0.055725782858475015, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.8062664219963813, 'colsample_bytree': 0.5884798608049184, 'reg_alpha': 0.10275502240623158, 'reg_lambda': 3.8314410968529655, 'gamma': 0.16835731396150172}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:54:57,463] Trial 41 finished with value: 26.46044995752797 and parameters: {'n_estimators': 240, 'learning_rate': 0.07957068466651573, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5751985819404787, 'colsample_bytree': 0.5301410525479905, 'reg_alpha': 1.874013866201114, 'reg_lambda': 4.934399292094538, 'gamma': 0.7902348147104918}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:03,352] Trial 42 finished with value: 26.46088052335964 and parameters: {'n_estimators': 247, 'learning_rate': 0.09303913977339354, 'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.6309053087060956, 'colsample_bytree': 0.550636605911671, 'reg_alpha': 1.9177325572338255, 'reg_lambda': 4.74843717695186, 'gamma': 0.7483905359601756}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:09,947] Trial 43 finished with value: 26.460633805305598 and parameters: {'n_estimators': 180, 'learning_rate': 0.07596558614220013, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5791847850149892, 'colsample_bytree': 0.6413315119267838, 'reg_alpha': 1.9912214602896283, 'reg_lambda': 4.530917087977352, 'gamma': 0.4140101492807363}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:15,568] Trial 44 finished with value: 26.46076712908486 and parameters: {'n_estimators': 324, 'learning_rate': 0.08897952307099494, 'max_depth': 4, 'min_child_weight': 6, 'subsample': 0.5296291371341685, 'colsample_bytree': 0.6625549205694227, 'reg_alpha': 1.1325524051420681, 'reg_lambda': 4.817087922961513, 'gamma': 0.08753823078932124}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:23,042] Trial 45 finished with value: 26.46060313630529 and parameters: {'n_estimators': 242, 'learning_rate': 0.03679235411875548, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.6014876448092319, 'colsample_bytree': 0.6996566057735346, 'reg_alpha': 1.8257218069529464, 'reg_lambda': 3.420512457969189, 'gamma': 0.9346869707951309}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:28,501] Trial 46 finished with value: 26.46093273305481 and parameters: {'n_estimators': 207, 'learning_rate': 0.09848999628901034, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.6502470269910693, 'colsample_bytree': 0.7253043370425059, 'reg_alpha': 0.9382358830312683, 'reg_lambda': 2.081370664453408, 'gamma': 0.47102952329203046}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:34,027] Trial 47 finished with value: 26.460931392675725 and parameters: {'n_estimators': 138, 'learning_rate': 0.0820495790607113, 'max_depth': 5, 'min_child_weight': 7, 'subsample': 0.5607791105814031, 'colsample_bytree': 0.5035363853237822, 'reg_alpha': 1.3632281710726937, 'reg_lambda': 4.060939091702034, 'gamma': 0.79021337876166}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:40,567] Trial 48 finished with value: 26.461089798089933 and parameters: {'n_estimators': 299, 'learning_rate': 0.05997535160242087, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.7109034105186339, 'colsample_bytree': 0.7707075467647746, 'reg_alpha': 0.8465074695322424, 'reg_lambda': 4.9673165135794095, 'gamma': 0.23151746919069083}. Best is trial 17 with value: 26.460434839877433.
[I 2025-09-24 13:55:49,438] Trial 49 finished with value: 26.46021859353366 and parameters: {'n_estimators': 457, 'learning_rate': 0.026795109277633986, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.755008650200686, 'colsample_bytree': 0.5377692956708234, 'reg_alpha': 1.6209567175589383, 'reg_lambda': 3.901897324133343, 'gamma': 0.2927907759783605}. Best is trial 49 with value: 26.46021859353366.

Best XGBoost CV RMSE: 26.4602
Best XGBoost params: {'n_estimators': 457, 'learning_rate': 0.026795109277633986, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.755008650200686, 'colsample_bytree': 0.5377692956708234, 'reg_alpha': 1.6209567175589383, 'reg_lambda': 3.901897324133343, 'gamma': 0.2927907759783605}

def objective_lightgbm(trial):
    # LightGBM-specific parameter space for regression
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 500),       
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),   
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'num_leaves': trial.suggest_int('num_leaves', 31, 127),
        'min_child_samples': trial.suggest_int('min_child_samples', 20, 100),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 2.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 2.0),
        'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 1.0),
        'random_state': 42,
        'objective': 'regression',
        'metric': 'rmse',
        'verbosity': -1,
        'early_stopping_rounds': 100,

        'device': 'gpu',
        'gpu_platform_id': 0,
        'gpu_device_id': 0,
    }

    cv_scores = []
    kf_tune = KFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in kf_tune.split(X, y):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

        model = lgb.LGBMRegressor(**params)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)])

        pred = model.predict(X_val_fold)
        score = root_mean_squared_error(y_val_fold, pred)
        cv_scores.append(score)

    return np.mean(cv_scores)

print("Tuning LightGBM parameters...")
study_lightgbm = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))
study_lightgbm.optimize(objective_lightgbm, n_trials=50)

best_lightgbm_params = study_lightgbm.best_params
print(f"Best LightGBM CV RMSE: {study_lightgbm.best_value:.4f}")
print(f"Best LightGBM params: {best_lightgbm_params}")

[I 2025-09-24 13:55:49,469] A new study created in memory with name: no-name-36f412fe-edbf-46e8-bf4d-8abe7c3acdfc

Tuning LightGBM parameters...

[I 2025-09-24 13:56:01,665] Trial 0 finished with value: 26.461082498339245 and parameters: {'n_estimators': 193, 'learning_rate': 0.09556428757689246, 'max_depth': 7, 'num_leaves': 89, 'min_child_samples': 32, 'subsample': 0.7467983561008608, 'colsample_bytree': 0.7174250836504598, 'reg_alpha': 1.7323522915498704, 'reg_lambda': 1.2022300234864176, 'min_split_gain': 0.7080725777960455}. Best is trial 0 with value: 26.461082498339245.
[I 2025-09-24 13:56:05,878] Trial 1 finished with value: 26.460307482752633 and parameters: {'n_estimators': 20, 'learning_rate': 0.0972918866945795, 'max_depth': 7, 'num_leaves': 51, 'min_child_samples': 34, 'subsample': 0.7550213529560301, 'colsample_bytree': 0.7912726728878613, 'reg_alpha': 1.0495128632644757, 'reg_lambda': 0.8638900372842315, 'min_split_gain': 0.2912291401980419}. Best is trial 1 with value: 26.460307482752633.
[I 2025-09-24 13:56:15,595] Trial 2 finished with value: 26.459609503714013 and parameters: {'n_estimators': 310, 'learning_rate': 0.022554447458683766, 'max_depth': 4, 'num_leaves': 66, 'min_child_samples': 56, 'subsample': 0.935552788417904, 'colsample_bytree': 0.7599021346475079, 'reg_alpha': 1.0284688768272232, 'reg_lambda': 1.184829137724085, 'min_split_gain': 0.046450412719997725}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:56:24,252] Trial 3 finished with value: 26.46006184012337 and parameters: {'n_estimators': 308, 'learning_rate': 0.02534717113185624, 'max_depth': 3, 'num_leaves': 123, 'min_child_samples': 98, 'subsample': 0.9425192044349383, 'colsample_bytree': 0.7913841307520112, 'reg_alpha': 0.19534422801276774, 'reg_lambda': 1.3684660530243138, 'min_split_gain': 0.4401524937396013}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:56:28,779] Trial 4 finished with value: 26.460165188136834 and parameters: {'n_estimators': 69, 'learning_rate': 0.054565921910014324, 'max_depth': 3, 'num_leaves': 119, 'min_child_samples': 40, 'subsample': 0.8987566853061946, 'colsample_bytree': 0.7935133228268233, 'reg_alpha': 1.0401360423556216, 'reg_lambda': 1.0934205586865593, 'min_split_gain': 0.18485445552552704}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:56:42,449] Trial 5 finished with value: 26.46087233421075 and parameters: {'n_estimators': 486, 'learning_rate': 0.07976195410250031, 'max_depth': 8, 'num_leaves': 117, 'min_child_samples': 68, 'subsample': 0.976562270506935, 'colsample_bytree': 0.7265477506155759, 'reg_alpha': 0.3919657248382904, 'reg_lambda': 0.09045457782107613, 'min_split_gain': 0.32533033076326434}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:56:54,748] Trial 6 finished with value: 26.45987301132029 and parameters: {'n_estimators': 200, 'learning_rate': 0.034421412859650634, 'max_depth': 7, 'num_leaves': 65, 'min_child_samples': 42, 'subsample': 0.8628088249474746, 'colsample_bytree': 0.7422772674924287, 'reg_alpha': 1.6043939615080793, 'reg_lambda': 0.14910128735954165, 'min_split_gain': 0.9868869366005173}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:57:02,843] Trial 7 finished with value: 26.460040513067224 and parameters: {'n_estimators': 389, 'learning_rate': 0.027884411338075517, 'max_depth': 3, 'num_leaves': 110, 'min_child_samples': 77, 'subsample': 0.9187021504122962, 'colsample_bytree': 0.9313811040057838, 'reg_alpha': 0.14808930346818072, 'reg_lambda': 0.7169314570885452, 'min_split_gain': 0.11586905952512971}. Best is trial 2 with value: 26.459609503714013.
[I 2025-09-24 13:57:09,924] Trial 8 finished with value: 26.459331263213898 and parameters: {'n_estimators': 433, 'learning_rate': 0.06609683141448022, 'max_depth': 4, 'num_leaves': 37, 'min_child_samples': 45, 'subsample': 0.7975549966080241, 'colsample_bytree': 0.9188818535014192, 'reg_alpha': 1.2751149427104262, 'reg_lambda': 1.774425485152653, 'min_split_gain': 0.4722149251619493}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:57:17,741] Trial 9 finished with value: 26.4605478359366 and parameters: {'n_estimators': 68, 'learning_rate': 0.07419203085006955, 'max_depth': 7, 'num_leaves': 85, 'min_child_samples': 82, 'subsample': 0.8481386789093173, 'colsample_bytree': 0.8568198488145982, 'reg_alpha': 0.8550820367170993, 'reg_lambda': 0.05083825348819038, 'min_split_gain': 0.10789142699330445}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:57:26,978] Trial 10 finished with value: 26.46043398882727 and parameters: {'n_estimators': 490, 'learning_rate': 0.051111962402468944, 'max_depth': 5, 'num_leaves': 32, 'min_child_samples': 22, 'subsample': 0.8031602632339044, 'colsample_bytree': 0.9883258047335242, 'reg_alpha': 1.3799496133162217, 'reg_lambda': 1.9395803434674757, 'min_split_gain': 0.6437127366238236}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:57:42,542] Trial 11 finished with value: 26.45968288408401 and parameters: {'n_estimators': 362, 'learning_rate': 0.010814705320464075, 'max_depth': 5, 'num_leaves': 34, 'min_child_samples': 56, 'subsample': 0.812066311436242, 'colsample_bytree': 0.8989048309881961, 'reg_alpha': 0.6348647915359373, 'reg_lambda': 1.772605589619047, 'min_split_gain': 0.016130444295014812}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:57:49,642] Trial 12 finished with value: 26.459498750075614 and parameters: {'n_estimators': 398, 'learning_rate': 0.07205829410085779, 'max_depth': 4, 'num_leaves': 57, 'min_child_samples': 57, 'subsample': 0.7100757654803416, 'colsample_bytree': 0.9388238851695223, 'reg_alpha': 1.9920330634948704, 'reg_lambda': 1.4476758028057264, 'min_split_gain': 0.6007406467577833}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:57:56,606] Trial 13 finished with value: 26.45938533725329 and parameters: {'n_estimators': 409, 'learning_rate': 0.07108409603631247, 'max_depth': 4, 'num_leaves': 48, 'min_child_samples': 48, 'subsample': 0.7069316338463831, 'colsample_bytree': 0.9640868078057971, 'reg_alpha': 1.93067589213091, 'reg_lambda': 1.605142391634072, 'min_split_gain': 0.6178601944677584}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:03,664] Trial 14 finished with value: 26.459338996495745 and parameters: {'n_estimators': 440, 'learning_rate': 0.06376163016941724, 'max_depth': 4, 'num_leaves': 47, 'min_child_samples': 45, 'subsample': 0.7050649519253404, 'colsample_bytree': 0.9986907013037633, 'reg_alpha': 1.3723432180704438, 'reg_lambda': 1.6568816770664074, 'min_split_gain': 0.8145901286801429}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:12,407] Trial 15 finished with value: 26.460466984543956 and parameters: {'n_estimators': 450, 'learning_rate': 0.04674468236268003, 'max_depth': 5, 'num_leaves': 44, 'min_child_samples': 20, 'subsample': 0.7726353422711295, 'colsample_bytree': 0.9973853524221147, 'reg_alpha': 1.360728031849036, 'reg_lambda': 1.9328536101542337, 'min_split_gain': 0.8441382814098004}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:22,165] Trial 16 finished with value: 26.45972070307014 and parameters: {'n_estimators': 311, 'learning_rate': 0.06421820079254417, 'max_depth': 6, 'num_leaves': 73, 'min_child_samples': 67, 'subsample': 0.791674795044748, 'colsample_bytree': 0.8846125958144417, 'reg_alpha': 1.354670151510485, 'reg_lambda': 1.6684337217895258, 'min_split_gain': 0.4741933131267933}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:28,787] Trial 17 finished with value: 26.45952766178374 and parameters: {'n_estimators': 445, 'learning_rate': 0.08168308200478944, 'max_depth': 4, 'num_leaves': 40, 'min_child_samples': 47, 'subsample': 0.8408038803290788, 'colsample_bytree': 0.9133377876079447, 'reg_alpha': 1.59685905813277, 'reg_lambda': 0.5917326535264544, 'min_split_gain': 0.8109655341158001}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:35,475] Trial 18 finished with value: 26.45963981172674 and parameters: {'n_estimators': 236, 'learning_rate': 0.06395826566038171, 'max_depth': 4, 'num_leaves': 96, 'min_child_samples': 29, 'subsample': 0.7397847038588629, 'colsample_bytree': 0.9585683482624957, 'reg_alpha': 0.729955597049381, 'reg_lambda': 1.4545454711971273, 'min_split_gain': 0.9807807014852743}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:47,624] Trial 19 finished with value: 26.459951780619313 and parameters: {'n_estimators': 361, 'learning_rate': 0.04209466100640401, 'max_depth': 6, 'num_leaves': 58, 'min_child_samples': 46, 'subsample': 0.8766446003661861, 'colsample_bytree': 0.8343565722978179, 'reg_alpha': 1.240783034692867, 'reg_lambda': 1.9876855700994174, 'min_split_gain': 0.3815341084988876}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:58:54,603] Trial 20 finished with value: 26.460090221327675 and parameters: {'n_estimators': 148, 'learning_rate': 0.08893216927518138, 'max_depth': 3, 'num_leaves': 31, 'min_child_samples': 63, 'subsample': 0.7292493849833972, 'colsample_bytree': 0.8633459903065002, 'reg_alpha': 1.2231478678466554, 'reg_lambda': 0.37702916614606186, 'min_split_gain': 0.8505384066690304}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:01,495] Trial 21 finished with value: 26.45955498125444 and parameters: {'n_estimators': 424, 'learning_rate': 0.06308693494285045, 'max_depth': 4, 'num_leaves': 47, 'min_child_samples': 50, 'subsample': 0.7019825576096854, 'colsample_bytree': 0.9620536246094258, 'reg_alpha': 1.8627223459213296, 'reg_lambda': 1.6344352921218759, 'min_split_gain': 0.5529311940795862}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:09,760] Trial 22 finished with value: 26.459736899406256 and parameters: {'n_estimators': 500, 'learning_rate': 0.07003762009891755, 'max_depth': 5, 'num_leaves': 53, 'min_child_samples': 51, 'subsample': 0.7730530034803582, 'colsample_bytree': 0.9698323994916667, 'reg_alpha': 1.6055556071207766, 'reg_lambda': 1.6738697476786082, 'min_split_gain': 0.7138880255245591}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:16,932] Trial 23 finished with value: 26.45971047953744 and parameters: {'n_estimators': 335, 'learning_rate': 0.05923162101459258, 'max_depth': 4, 'num_leaves': 39, 'min_child_samples': 37, 'subsample': 0.7203988584165296, 'colsample_bytree': 0.9291681549628207, 'reg_alpha': 1.789676805252434, 'reg_lambda': 1.3584223108552134, 'min_split_gain': 0.7372719181994586}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:24,933] Trial 24 finished with value: 26.46003225878435 and parameters: {'n_estimators': 417, 'learning_rate': 0.0835960659249789, 'max_depth': 5, 'num_leaves': 66, 'min_child_samples': 44, 'subsample': 0.7000786336914139, 'colsample_bytree': 0.9705536792722567, 'reg_alpha': 1.4723596872941196, 'reg_lambda': 1.788230033536935, 'min_split_gain': 0.5400944011591632}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:30,548] Trial 25 finished with value: 26.459910264405465 and parameters: {'n_estimators': 270, 'learning_rate': 0.06956187082802678, 'max_depth': 3, 'num_leaves': 47, 'min_child_samples': 28, 'subsample': 0.8271998192286236, 'colsample_bytree': 0.9438857557841556, 'reg_alpha': 1.1917937096189095, 'reg_lambda': 1.5453595458974945, 'min_split_gain': 0.6244117278930565}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:37,612] Trial 26 finished with value: 26.45958372022313 and parameters: {'n_estimators': 450, 'learning_rate': 0.05792209615335231, 'max_depth': 4, 'num_leaves': 40, 'min_child_samples': 74, 'subsample': 0.778930162071706, 'colsample_bytree': 0.9940715071684404, 'reg_alpha': 1.9843299482909569, 'reg_lambda': 1.81822354449189, 'min_split_gain': 0.90484220294817}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:48,859] Trial 27 finished with value: 26.45997892076453 and parameters: {'n_estimators': 377, 'learning_rate': 0.042490857464671364, 'max_depth': 6, 'num_leaves': 77, 'min_child_samples': 52, 'subsample': 0.756554992900205, 'colsample_bytree': 0.9091238829631572, 'reg_alpha': 0.8637609048114747, 'reg_lambda': 0.943023342873055, 'min_split_gain': 0.7709483230566468}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 13:59:55,381] Trial 28 finished with value: 26.459618078265002 and parameters: {'n_estimators': 460, 'learning_rate': 0.08906339202243543, 'max_depth': 4, 'num_leaves': 57, 'min_child_samples': 40, 'subsample': 0.7189999985310273, 'colsample_bytree': 0.8796794581448465, 'reg_alpha': 1.4784360340333058, 'reg_lambda': 1.2621396802378482, 'min_split_gain': 0.40315142018634986}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:03,648] Trial 29 finished with value: 26.459936969544323 and parameters: {'n_estimators': 276, 'learning_rate': 0.07642743745220715, 'max_depth': 5, 'num_leaves': 102, 'min_child_samples': 32, 'subsample': 0.7453585010489386, 'colsample_bytree': 0.9556444767102111, 'reg_alpha': 1.7239572415739641, 'reg_lambda': 1.5077915043654422, 'min_split_gain': 0.6424345466737234}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:09,239] Trial 30 finished with value: 26.460189164104587 and parameters: {'n_estimators': 412, 'learning_rate': 0.09136803154907083, 'max_depth': 3, 'num_leaves': 72, 'min_child_samples': 60, 'subsample': 0.7306349248144017, 'colsample_bytree': 0.8271174420728178, 'reg_alpha': 1.7133700288941425, 'reg_lambda': 1.1898287629230837, 'min_split_gain': 0.517854559333056}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:16,672] Trial 31 finished with value: 26.45971590939925 and parameters: {'n_estimators': 398, 'learning_rate': 0.06963642911699928, 'max_depth': 4, 'num_leaves': 58, 'min_child_samples': 56, 'subsample': 0.700745948157433, 'colsample_bytree': 0.9331791891974955, 'reg_alpha': 1.9864773727730096, 'reg_lambda': 1.5588112227419733, 'min_split_gain': 0.5946105878805008}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:23,744] Trial 32 finished with value: 26.459610445590652 and parameters: {'n_estimators': 346, 'learning_rate': 0.07311373563638746, 'max_depth': 4, 'num_leaves': 50, 'min_child_samples': 36, 'subsample': 0.7492099174310412, 'colsample_bytree': 0.9816460205972959, 'reg_alpha': 1.8949879539811685, 'reg_lambda': 1.8188530426143907, 'min_split_gain': 0.695886805437763}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:31,190] Trial 33 finished with value: 26.45962083354285 and parameters: {'n_estimators': 471, 'learning_rate': 0.06598251048981094, 'max_depth': 4, 'num_leaves': 42, 'min_child_samples': 62, 'subsample': 0.7157952585525321, 'colsample_bytree': 0.9439638085343836, 'reg_alpha': 1.8529175336389776, 'reg_lambda': 1.4002709220842862, 'min_split_gain': 0.3124644291532972}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:37,623] Trial 34 finished with value: 26.4600457199358 and parameters: {'n_estimators': 429, 'learning_rate': 0.051847431807038075, 'max_depth': 3, 'num_leaves': 54, 'min_child_samples': 54, 'subsample': 0.7595600143449905, 'colsample_bytree': 0.9792855609801053, 'reg_alpha': 1.50589856077355, 'reg_lambda': 1.3218925551227685, 'min_split_gain': 0.4691550321834365}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:46,007] Trial 35 finished with value: 26.459709601265114 and parameters: {'n_estimators': 386, 'learning_rate': 0.05874523387285924, 'max_depth': 5, 'num_leaves': 63, 'min_child_samples': 47, 'subsample': 0.7342443618717415, 'colsample_bytree': 0.9182783813200496, 'reg_alpha': 1.1558162665588787, 'reg_lambda': 1.7034925114696544, 'min_split_gain': 0.234222426552057}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:00:52,739] Trial 36 finished with value: 26.459499183780498 and parameters: {'n_estimators': 336, 'learning_rate': 0.07812252539368526, 'max_depth': 4, 'num_leaves': 36, 'min_child_samples': 58, 'subsample': 0.7876128180436971, 'colsample_bytree': 0.9471730688837581, 'reg_alpha': 1.7052769299897406, 'reg_lambda': 0.9984371813386066, 'min_split_gain': 0.5732427905859825}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:02,171] Trial 37 finished with value: 26.45958656295566 and parameters: {'n_estimators': 311, 'learning_rate': 0.09946970103378139, 'max_depth': 8, 'num_leaves': 49, 'min_child_samples': 92, 'subsample': 0.7603962956397491, 'colsample_bytree': 0.700435003537826, 'reg_alpha': 1.0786102811946168, 'reg_lambda': 1.5380271867430058, 'min_split_gain': 0.6668789917703478}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:09,110] Trial 38 finished with value: 26.460092113210237 and parameters: {'n_estimators': 471, 'learning_rate': 0.06772309737705531, 'max_depth': 3, 'num_leaves': 61, 'min_child_samples': 41, 'subsample': 0.9830138544956675, 'colsample_bytree': 0.8913999530982288, 'reg_alpha': 0.4888873224295107, 'reg_lambda': 1.1422635447750533, 'min_split_gain': 0.9188464688439424}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:14,961] Trial 39 finished with value: 26.46009516288475 and parameters: {'n_estimators': 401, 'learning_rate': 0.0837369764469043, 'max_depth': 3, 'num_leaves': 85, 'min_child_samples': 64, 'subsample': 0.9544850250319363, 'colsample_bytree': 0.9994507914109878, 'reg_alpha': 0.9341122188935553, 'reg_lambda': 1.2775243869260942, 'min_split_gain': 0.3876530195886262}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:21,892] Trial 40 finished with value: 26.459470425866463 and parameters: {'n_estimators': 183, 'learning_rate': 0.07376641920326661, 'max_depth': 4, 'num_leaves': 52, 'min_child_samples': 71, 'subsample': 0.7114098794253036, 'colsample_bytree': 0.7728285420471634, 'reg_alpha': 0.037650326956597646, 'reg_lambda': 0.795094422361412, 'min_split_gain': 0.7663209273081535}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:28,394] Trial 41 finished with value: 26.459345294949532 and parameters: {'n_estimators': 159, 'learning_rate': 0.07353471149494296, 'max_depth': 4, 'num_leaves': 53, 'min_child_samples': 73, 'subsample': 0.7128366356473822, 'colsample_bytree': 0.7630491080447548, 'reg_alpha': 0.3268590484156356, 'reg_lambda': 0.7727137010706538, 'min_split_gain': 0.7632552597518815}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:34,769] Trial 42 finished with value: 26.459518282101033 and parameters: {'n_estimators': 153, 'learning_rate': 0.07552866039453277, 'max_depth': 4, 'num_leaves': 45, 'min_child_samples': 76, 'subsample': 0.7269331555546392, 'colsample_bytree': 0.7688946986280618, 'reg_alpha': 0.08888273312390824, 'reg_lambda': 0.7664004825630781, 'min_split_gain': 0.7522528415373898}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:42,506] Trial 43 finished with value: 26.459719971017943 and parameters: {'n_estimators': 155, 'learning_rate': 0.06171411244769154, 'max_depth': 5, 'num_leaves': 36, 'min_child_samples': 82, 'subsample': 0.7139541289035821, 'colsample_bytree': 0.7754506527053163, 'reg_alpha': 0.2762464018153057, 'reg_lambda': 0.5435251766042981, 'min_split_gain': 0.8020945104499154}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:48,593] Trial 44 finished with value: 26.459620920302637 and parameters: {'n_estimators': 115, 'learning_rate': 0.05467476899128837, 'max_depth': 4, 'num_leaves': 52, 'min_child_samples': 71, 'subsample': 0.8942547436800034, 'colsample_bytree': 0.8091377618358396, 'reg_alpha': 0.2482050553854738, 'reg_lambda': 0.608911891153446, 'min_split_gain': 0.9035944316040128}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:54,680] Trial 45 finished with value: 26.460026362528943 and parameters: {'n_estimators': 206, 'learning_rate': 0.07991715496888392, 'max_depth': 3, 'num_leaves': 69, 'min_child_samples': 82, 'subsample': 0.8109822506127368, 'colsample_bytree': 0.7401902875811192, 'reg_alpha': 0.001637517538110339, 'reg_lambda': 0.8434396671447744, 'min_split_gain': 0.6926154217446405}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:01:58,461] Trial 46 finished with value: 26.46003108971695 and parameters: {'n_estimators': 15, 'learning_rate': 0.09438396446105053, 'max_depth': 5, 'num_leaves': 44, 'min_child_samples': 89, 'subsample': 0.7370843886186922, 'colsample_bytree': 0.7537387293475388, 'reg_alpha': 0.44364107973391004, 'reg_lambda': 0.44791245705576493, 'min_split_gain': 0.8334580999296372}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:02:04,170] Trial 47 finished with value: 26.459779573321516 and parameters: {'n_estimators': 109, 'learning_rate': 0.08558210581196053, 'max_depth': 4, 'num_leaves': 37, 'min_child_samples': 80, 'subsample': 0.7638644271152439, 'colsample_bytree': 0.8003034526604768, 'reg_alpha': 0.3343477453796575, 'reg_lambda': 0.745917022188623, 'min_split_gain': 0.7850174177991242}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:02:12,231] Trial 48 finished with value: 26.459475168113293 and parameters: {'n_estimators': 181, 'learning_rate': 0.07324539342161948, 'max_depth': 5, 'num_leaves': 53, 'min_child_samples': 68, 'subsample': 0.7112558314959422, 'colsample_bytree': 0.7269278081695616, 'reg_alpha': 0.7285821211397439, 'reg_lambda': 1.056510741566331, 'min_split_gain': 0.876594845481665}. Best is trial 8 with value: 26.459331263213898.
[I 2025-09-24 14:02:22,527] Trial 49 finished with value: 26.459707431022093 and parameters: {'n_estimators': 222, 'learning_rate': 0.015415698563212621, 'max_depth': 4, 'num_leaves': 31, 'min_child_samples': 86, 'subsample': 0.7453185351141247, 'colsample_bytree': 0.8235212045461247, 'reg_alpha': 0.5373174449907621, 'reg_lambda': 0.8879176135589146, 'min_split_gain': 0.7373326688234401}. Best is trial 8 with value: 26.459331263213898.

Best LightGBM CV RMSE: 26.4593
Best LightGBM params: {'n_estimators': 433, 'learning_rate': 0.06609683141448022, 'max_depth': 4, 'num_leaves': 37, 'min_child_samples': 45, 'subsample': 0.7975549966080241, 'colsample_bytree': 0.9188818535014192, 'reg_alpha': 1.2751149427104262, 'reg_lambda': 1.774425485152653, 'min_split_gain': 0.4722149251619493}

def objective_catboost(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'random_seed': 42,
        'eval_metric': 'RMSE',
        'early_stopping_rounds': 100,
        'verbose': False,
        
        'task_type': 'GPU',  # Use GPU for training
        'devices': '0:1'     # Specify GPU devices
    }

    cv_scores = []
    kf_tune = KFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in kf_tune.split(X, y):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_val_fold, y_val_fold = X.iloc[val_idx], y.iloc[val_idx]

        model = cb.CatBoostRegressor(**params)
        model.fit(X_train_fold, y_train_fold, eval_set=(X_val_fold, y_val_fold))

        pred = model.predict(X_val_fold)
        score = root_mean_squared_error(y_val_fold, pred)
        cv_scores.append(score)

    return np.mean(cv_scores)

print("➗ Tuning CatBoost parameters...")
study_catboost = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))
study_catboost.optimize(objective_catboost, n_trials=50)

best_catboost_params = study_catboost.best_params
print(f"Best CatBoost params: {best_catboost_params}")
print(f"Best CatBoost CV RMSE: {study_catboost.best_value:.4f}")

[I 2025-09-24 14:02:22,546] A new study created in memory with name: no-name-63e88423-b43a-4df9-b693-d5ce9789c7ad

➗ Tuning CatBoost parameters...

[I 2025-09-24 14:02:29,794] Trial 0 finished with value: 26.46067841746834 and parameters: {'iterations': 437, 'learning_rate': 0.09556428757689246, 'depth': 8, 'l2_leaf_reg': 6.387926357773329, 'border_count': 66}. Best is trial 0 with value: 26.46067841746834.
[I 2025-09-24 14:02:46,855] Trial 1 finished with value: 26.459670724505884 and parameters: {'iterations': 240, 'learning_rate': 0.015227525095137952, 'depth': 9, 'l2_leaf_reg': 6.41003510568888, 'border_count': 190}. Best is trial 1 with value: 26.459670724505884.
[I 2025-09-24 14:02:54,650] Trial 2 finished with value: 26.46104649941759 and parameters: {'iterations': 118, 'learning_rate': 0.0972918866945795, 'depth': 9, 'l2_leaf_reg': 2.9110519961044856, 'border_count': 72}. Best is trial 1 with value: 26.459670724505884.
[I 2025-09-24 14:03:02,672] Trial 3 finished with value: 26.459707519560766 and parameters: {'iterations': 265, 'learning_rate': 0.0373818018663584, 'depth': 7, 'l2_leaf_reg': 4.887505167779041, 'border_count': 97}. Best is trial 1 with value: 26.459670724505884.
[I 2025-09-24 14:03:13,157] Trial 4 finished with value: 26.459072844426466 and parameters: {'iterations': 651, 'learning_rate': 0.022554447458683766, 'depth': 5, 'l2_leaf_reg': 4.297256589643226, 'border_count': 134}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:03:22,027] Trial 5 finished with value: 26.459478702379265 and parameters: {'iterations': 807, 'learning_rate': 0.02797064039425238, 'depth': 7, 'l2_leaf_reg': 6.331731119758382, 'border_count': 42}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:03:32,502] Trial 6 finished with value: 26.45971289521408 and parameters: {'iterations': 647, 'learning_rate': 0.02534717113185624, 'depth': 3, 'l2_leaf_reg': 9.539969835279999, 'border_count': 248}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:03:40,442] Trial 7 finished with value: 26.459792294205933 and parameters: {'iterations': 828, 'learning_rate': 0.037415239225603365, 'depth': 3, 'l2_leaf_reg': 7.158097238609412, 'border_count': 130}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:03:46,326] Trial 8 finished with value: 26.459840719426147 and parameters: {'iterations': 209, 'learning_rate': 0.054565921910014324, 'depth': 3, 'l2_leaf_reg': 9.18388361870904, 'border_count': 89}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:03:54,949] Trial 9 finished with value: 26.459311869527113 and parameters: {'iterations': 696, 'learning_rate': 0.038053996848046986, 'depth': 7, 'l2_leaf_reg': 5.920392514089517, 'border_count': 73}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:04:01,456] Trial 10 finished with value: 26.45944131127093 and parameters: {'iterations': 961, 'learning_rate': 0.07298875089294375, 'depth': 5, 'l2_leaf_reg': 1.1616568805333802, 'border_count': 175}. Best is trial 4 with value: 26.459072844426466.
[I 2025-09-24 14:04:09,056] Trial 11 finished with value: 26.45905045084765 and parameters: {'iterations': 560, 'learning_rate': 0.05420464647674178, 'depth': 5, 'l2_leaf_reg': 4.013156586009128, 'border_count': 131}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:16,341] Trial 12 finished with value: 26.459258052085328 and parameters: {'iterations': 542, 'learning_rate': 0.06056303874014642, 'depth': 5, 'l2_leaf_reg': 3.6301461895178777, 'border_count': 139}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:27,836] Trial 13 finished with value: 26.459366859268435 and parameters: {'iterations': 413, 'learning_rate': 0.01099954153915563, 'depth': 5, 'l2_leaf_reg': 4.073333099706512, 'border_count': 176}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:34,103] Trial 14 finished with value: 26.459360546782598 and parameters: {'iterations': 523, 'learning_rate': 0.07567872365264812, 'depth': 5, 'l2_leaf_reg': 1.8833121437563944, 'border_count': 114}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:41,005] Trial 15 finished with value: 26.45952811170384 and parameters: {'iterations': 703, 'learning_rate': 0.05331596436694203, 'depth': 4, 'l2_leaf_reg': 2.6877936829003044, 'border_count': 227}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:47,171] Trial 16 finished with value: 26.45945549100266 and parameters: {'iterations': 461, 'learning_rate': 0.06852732333724086, 'depth': 6, 'l2_leaf_reg': 4.693037542497081, 'border_count': 146}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:53,152] Trial 17 finished with value: 26.460009576708565 and parameters: {'iterations': 636, 'learning_rate': 0.08581774480222944, 'depth': 6, 'l2_leaf_reg': 8.272594310024479, 'border_count': 164}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:04:59,869] Trial 18 finished with value: 26.45937836253728 and parameters: {'iterations': 337, 'learning_rate': 0.05112078069613513, 'depth': 4, 'l2_leaf_reg': 3.5133049948191672, 'border_count': 198}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:12,002] Trial 19 finished with value: 26.460119003448256 and parameters: {'iterations': 795, 'learning_rate': 0.02261540171185865, 'depth': 10, 'l2_leaf_reg': 5.036095662544337, 'border_count': 118}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:18,439] Trial 20 finished with value: 26.45942778418759 and parameters: {'iterations': 985, 'learning_rate': 0.04682360310956884, 'depth': 4, 'l2_leaf_reg': 7.36747277024179, 'border_count': 153}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:26,332] Trial 21 finished with value: 26.459077716970533 and parameters: {'iterations': 567, 'learning_rate': 0.04414658510823381, 'depth': 5, 'l2_leaf_reg': 3.9518678932161597, 'border_count': 134}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:33,007] Trial 22 finished with value: 26.45965487927986 and parameters: {'iterations': 587, 'learning_rate': 0.044989227530774785, 'depth': 6, 'l2_leaf_reg': 3.9923129803321276, 'border_count': 106}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:39,085] Trial 23 finished with value: 26.45917244451249 and parameters: {'iterations': 500, 'learning_rate': 0.06223506119169805, 'depth': 4, 'l2_leaf_reg': 2.8368840744346446, 'border_count': 126}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:46,881] Trial 24 finished with value: 26.45915635464541 and parameters: {'iterations': 725, 'learning_rate': 0.04272034198710173, 'depth': 5, 'l2_leaf_reg': 5.576428804215714, 'border_count': 155}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:05:54,413] Trial 25 finished with value: 26.459372825256985 and parameters: {'iterations': 377, 'learning_rate': 0.0313395516678257, 'depth': 6, 'l2_leaf_reg': 4.398606906810977, 'border_count': 200}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:03,178] Trial 26 finished with value: 26.459522003864986 and parameters: {'iterations': 592, 'learning_rate': 0.020983970091044933, 'depth': 4, 'l2_leaf_reg': 3.2716722292301204, 'border_count': 128}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:09,104] Trial 27 finished with value: 26.459333292152138 and parameters: {'iterations': 878, 'learning_rate': 0.06417925269861637, 'depth': 5, 'l2_leaf_reg': 1.7626143350076542, 'border_count': 88}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:17,034] Trial 28 finished with value: 26.459285359435132 and parameters: {'iterations': 623, 'learning_rate': 0.033824639162804, 'depth': 6, 'l2_leaf_reg': 2.323357531687925, 'border_count': 142}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:22,505] Trial 29 finished with value: 26.460653547897273 and parameters: {'iterations': 468, 'learning_rate': 0.0815009733076648, 'depth': 8, 'l2_leaf_reg': 5.250535592500121, 'border_count': 32}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:31,305] Trial 30 finished with value: 26.459735109704248 and parameters: {'iterations': 759, 'learning_rate': 0.018596847328845718, 'depth': 8, 'l2_leaf_reg': 4.283751125136184, 'border_count': 59}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:38,185] Trial 31 finished with value: 26.459272516296416 and parameters: {'iterations': 714, 'learning_rate': 0.044348447043803245, 'depth': 5, 'l2_leaf_reg': 5.89022464046902, 'border_count': 163}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:45,556] Trial 32 finished with value: 26.459168476868236 and parameters: {'iterations': 563, 'learning_rate': 0.04181538347946288, 'depth': 5, 'l2_leaf_reg': 5.459485930488398, 'border_count': 158}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:52,218] Trial 33 finished with value: 26.45941404822587 and parameters: {'iterations': 659, 'learning_rate': 0.05791371460631222, 'depth': 4, 'l2_leaf_reg': 3.773732848189078, 'border_count': 180}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:06:58,716] Trial 34 finished with value: 26.459769571825696 and parameters: {'iterations': 905, 'learning_rate': 0.05084632532384495, 'depth': 6, 'l2_leaf_reg': 6.974380584379952, 'border_count': 101}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:08,199] Trial 35 finished with value: 26.459490669607753 and parameters: {'iterations': 738, 'learning_rate': 0.029163661069284412, 'depth': 7, 'l2_leaf_reg': 4.759413797876128, 'border_count': 139}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:20,164] Trial 36 finished with value: 26.459119085983936 and parameters: {'iterations': 668, 'learning_rate': 0.012845948981292314, 'depth': 5, 'l2_leaf_reg': 6.461755082761675, 'border_count': 111}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:29,849] Trial 37 finished with value: 26.460118877155203 and parameters: {'iterations': 498, 'learning_rate': 0.01082971830967868, 'depth': 3, 'l2_leaf_reg': 8.062722783690878, 'border_count': 115}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:40,805] Trial 38 finished with value: 26.459325041652896 and parameters: {'iterations': 671, 'learning_rate': 0.015029062353473586, 'depth': 7, 'l2_leaf_reg': 6.237513473940993, 'border_count': 91}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:45,110] Trial 39 finished with value: 26.460038623324827 and parameters: {'iterations': 100, 'learning_rate': 0.026059750375353016, 'depth': 4, 'l2_leaf_reg': 6.705684445886612, 'border_count': 107}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:07:55,909] Trial 40 finished with value: 26.45922747675387 and parameters: {'iterations': 603, 'learning_rate': 0.019040872587329095, 'depth': 5, 'l2_leaf_reg': 3.1814671803151087, 'border_count': 81}. Best is trial 11 with value: 26.45905045084765.
[I 2025-09-24 14:08:03,864] Trial 41 finished with value: 26.459022732434143 and parameters: {'iterations': 771, 'learning_rate': 0.039065982569865626, 'depth': 5, 'l2_leaf_reg': 5.676518482917524, 'border_count': 134}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:11,937] Trial 42 finished with value: 26.459601365816418 and parameters: {'iterations': 847, 'learning_rate': 0.035160062802251854, 'depth': 6, 'l2_leaf_reg': 4.585982311855646, 'border_count': 127}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:24,868] Trial 43 finished with value: 26.45912545315624 and parameters: {'iterations': 774, 'learning_rate': 0.015209110243488307, 'depth': 5, 'l2_leaf_reg': 7.608653982011489, 'border_count': 137}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:31,567] Trial 44 finished with value: 26.45942095735745 and parameters: {'iterations': 566, 'learning_rate': 0.04843303336927048, 'depth': 4, 'l2_leaf_reg': 5.852179562177783, 'border_count': 120}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:39,939] Trial 45 finished with value: 26.45929852018108 and parameters: {'iterations': 688, 'learning_rate': 0.040427629933072036, 'depth': 3, 'l2_leaf_reg': 5.243605042928747, 'border_count': 65}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:48,962] Trial 46 finished with value: 26.459083854060548 and parameters: {'iterations': 531, 'learning_rate': 0.025655752707235635, 'depth': 5, 'l2_leaf_reg': 4.016144373634581, 'border_count': 148}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:08:57,466] Trial 47 finished with value: 26.45942046399345 and parameters: {'iterations': 405, 'learning_rate': 0.02934835025311712, 'depth': 6, 'l2_leaf_reg': 4.022722983481144, 'border_count': 149}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:09:05,847] Trial 48 finished with value: 26.45949852374045 and parameters: {'iterations': 537, 'learning_rate': 0.03778824583750322, 'depth': 7, 'l2_leaf_reg': 2.362990261547621, 'border_count': 169}. Best is trial 41 with value: 26.459022732434143.
[I 2025-09-24 14:09:13,744] Trial 49 finished with value: 26.45928855631837 and parameters: {'iterations': 303, 'learning_rate': 0.03208580905958678, 'depth': 5, 'l2_leaf_reg': 3.3285324631803257, 'border_count': 134}. Best is trial 41 with value: 26.459022732434143.

Best CatBoost params: {'iterations': 771, 'learning_rate': 0.039065982569865626, 'depth': 5, 'l2_leaf_reg': 5.676518482917524, 'border_count': 134}
Best CatBoost CV RMSE: 26.4590

def generate_stack(
    base_models,
    X, y,
    X_test,
    meta_model=None,
    folds=5,
    seed=42,
    eval_metric=root_mean_squared_error
):
    """
    Train a stacking ensemble.

    Parameters:
    - base_models: list of sklearn-like models (must implement fit & predict)
    - X, y: training data
    - X_test: test data (for generating meta features)
    - meta_model: sklearn-like model for meta learning (default: LinearRegression)
    - folds: number of KFold splits
    - seed: random seed for reproducibility
    - eval_metric: function to evaluate final stacked predictions (default: RMSE)

    Returns:
    - meta_model: trained meta-model
    - meta_features_train: level-one train predictions
    - meta_features_test: level-one test predictions
    - final_score: score computed by eval_metric
    - train_preds: meta-model predictions on training data (for evaluation)
    - test_preds: meta-model predictions on test data (for submission)
    """

    n_models = len(base_models)
    meta_features_train = np.zeros((len(X), n_models))
    meta_features_test = np.zeros((len(X_test), n_models))
    
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)

    print(f"Starting stacking with {n_models} base models and {folds}-fold CV...")
    
    for i, model in enumerate(base_models):
        print(f"\nTraining model {i+1}/{n_models}: {model.__class__.__name__}")
        
        oof = np.zeros(len(X))
        preds = np.zeros(len(X_test))
        
        for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

            model.fit(X_train, y_train)
            oof[val_idx] = model.predict(X_val)
            preds += model.predict(X_test) / folds

            fold_score = eval_metric(y_val, oof[val_idx])
            print(f"  Fold {fold+1} {eval_metric.__name__}: {fold_score:.4f}")

        meta_features_train[:, i] = oof
        meta_features_test[:, i] = preds
    
    # Use default meta model if none provided
    if meta_model is None:
        meta_model = LinearRegression()

    print(f"\nFitting meta-model: {meta_model.__class__.__name__}")
    meta_model.fit(meta_features_train, y)
    
    # Generate predictions for both train (evaluation) and test (submission)
    train_preds = meta_model.predict(meta_features_train)
    test_preds = meta_model.predict(meta_features_test)
    
    final_score = eval_metric(y, train_preds)
    print(f"\nFinal cross-validation {eval_metric.__name__}: {final_score:.4f}")

    return meta_model, meta_features_train, meta_features_test, final_score, train_preds, test_preds

# Usage
from sklearn.linear_model import Ridge

base_models = [
    xg.XGBRegressor(**best_xgboost_params, random_state=42, n_jobs=-1),
    lgb.LGBMRegressor(**best_lightgbm_params, random_state=42, n_jobs=-1),
    cb.CatBoostRegressor(**best_catboost_params, random_state=42, verbose=0)
]

meta_model, meta_features_train, meta_features_test, final_score, train_preds, test_preds = generate_stack(
    base_models, X, y, X_test, 
    meta_model=Ridge(), 
    folds=5, 
    seed=42, 
    eval_metric=root_mean_squared_error
)

# ✅ CORRECT: Use test_preds for submission
test_predictions = np.clip(test_preds, 60, 200)

submission = pd.DataFrame({
    'id': test['id'],
    'BeatsPerMinute': test_predictions
})

print(f"\n✅ Submission created with {len(submission)} predictions")
print(f"Prediction range: {test_predictions.min():.2f} - {test_predictions.max():.2f}")

submission.to_csv("alpha_romeo.csv", index=False)

Starting stacking with 3 base models and 5-fold CV...

Training model 1/3: XGBRegressor
  Fold 1 root_mean_squared_error: 26.4433
  Fold 2 root_mean_squared_error: 26.4885
  Fold 3 root_mean_squared_error: 26.5260
  Fold 4 root_mean_squared_error: 26.4453
  Fold 5 root_mean_squared_error: 26.4104

Training model 2/3: LGBMRegressor
  Fold 1 root_mean_squared_error: 26.4520
  Fold 2 root_mean_squared_error: 26.4970
  Fold 3 root_mean_squared_error: 26.5345
  Fold 4 root_mean_squared_error: 26.4563
  Fold 5 root_mean_squared_error: 26.4220

Training model 3/3: CatBoostRegressor
  Fold 1 root_mean_squared_error: 26.4429
  Fold 2 root_mean_squared_error: 26.4872
  Fold 3 root_mean_squared_error: 26.5277
  Fold 4 root_mean_squared_error: 26.4472
  Fold 5 root_mean_squared_error: 26.4123

Fitting meta-model: Ridge

Final cross-validation root_mean_squared_error: 26.4609

✅ Submission created with 174722 predictions
Prediction range: 113.93 - 125.09

submission.head()

	id	BeatsPerMinute
0	524164	119.335383
1	524165	118.342558
2	524166	120.133692
3	524167	118.938178
4	524168	119.911148