# !pip install dvc mlflow scikit-learn

import subprocess, pathlib
import mlflow
import mlflow.sklearn

# Set the MLflow tracking URI (local for the lab, S3 / Databricks for production)
import os
os.environ.setdefault('MLFLOW_TRACKING_URI', 'sqlite:///mlflow.db')
print('MLflow URI:', os.environ['MLFLOW_TRACKING_URI'])

# !pip install dvc dvc-s3
# !dvc init
# !dvc add data/training.csv
# git add data/training.csv.dvc .gitignore
#
# YOUR TURN — point a DVC remote at the storage of your choice (S3, GCS, Azure, local).

# YOUR TURN
# Wrap your training script in mlflow.start_run(). Log params, metrics, and
# the trained model with mlflow.sklearn.log_model.

# YOUR TURN
# Register the model. Transition stage from None -> Staging -> Production.
# Tag a v1.0 release in Git.

Lab 2 — Versioning the full ML project¶

Setup¶

Pick a model from a prior lab and version it end-to-end¶

Exercise 1 — Version the data with DVC¶

Exercise 2 — Log model training with MLflow¶

Exercise 3 — Promote to Model Registry¶

Done?¶