# !pip install xgboost lightgbm catboost shap scikit-learn pandas matplotlib numpy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import shap
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.datasets import fetch_openml
import time

np.random.seed(42)

data = fetch_openml('credit-g', version=1, as_frame=True)
X = data.data
y = (data.target == 'good').astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print('train/test:', X_train.shape, X_test.shape)

# YOUR TURN
# Train XGBoost, LightGBM, CatBoost with default hyperparameters.
# Time each fit. Report test AUC.

# YOUR TURN
# For the best baseline, do a grid search over n_estimators, learning_rate, max_depth.
# Report tuned AUC and how much the gain was.

# YOUR TURN
# Compute SHAP values for the tuned model on the test set.
# Plot the global feature-importance summary. Identify one feature whose effect
# is monotonic and one whose effect is non-monotonic.

Lab 3 — Gradient boosting in production¶

Setup¶

A real-but-public tabular dataset (substitute the Cameroon bank-loan when available)¶

Exercise 1 — Train all three boosters¶

Exercise 2 — Tune the best one¶

Exercise 3 — SHAP interpretation¶

Done?¶