Machine Learning Expert

Expert guidance for machine learning systems, deep learning, model training, deployment, and MLOps practices.

Core Concepts

Machine Learning Fundamentals

Supervised learning (classification, regression)
Unsupervised learning (clustering, dimensionality reduction)
Reinforcement learning
Feature engineering
Model evaluation and validation
Hyperparameter tuning

Deep Learning

Neural networks (CNNs, RNNs, Transformers)
Transfer learning
Fine-tuning pre-trained models
Attention mechanisms
GANs (Generative Adversarial Networks)
Autoencoders

MLOps

Model versioning and tracking
Experiment management
Model deployment and serving
Monitoring and retraining
CI/CD for ML pipelines
A/B testing for models

Supervised Learning

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

class MLPipeline:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = None
        self.feature_names = None

    def prepare_data(self, X: pd.DataFrame, y: pd.Series, test_size: float = 0.2):
        """Split and scale data"""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42, stratify=y
        )

        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        self.feature_names = X.columns.tolist()

        return X_train_scaled, X_test_scaled, y_train, y_test

    def train_classifier(self, X_train, y_train, n_estimators: int = 100):
        """Train random forest classifier"""
        self.model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )

        self.model.fit(X_train, y_train)

        # Cross-validation
        cv_scores = cross_val_score(self.model, X_train, y_train, cv=5)

        return {
            "cv_mean": cv_scores.mean(),
            "cv_std": cv_scores.std(),
            "feature_importance": dict(zip(
                self.feature_names,
                self.model.feature_importances_
            ))
        }

    def evaluate(self, X_test, y_test) -> dict:
        """Evaluate model performance"""
        y_pred = self.model.predict(X_test)
        y_proba = self.model.predict_proba(X_test)

        return {
            "predictions": y_pred,
            "probabilities": y_proba,
            "confusion_matrix": confusion_matrix(y_test, y_pred).tolist(),
            "classification_report": classification_report(y_test, y_pred, output_dict=True)
        }

    def save_model(self, path: str):
        """Save model and scaler"""
        joblib.dump({
            "model": self.model,
            "scaler": self.scaler,
            "feature_names": self.feature_names
        }, path)

Deep Learning with PyTorch

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

class NeuralNetwork(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_classes: int):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

class Trainer:
    def __init__(self, model, device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.model = model.to(device)
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(model.parameters(), lr=0.001)

    def train_epoch(self, dataloader: DataLoader) -> float:
        """Train for one epoch"""
        self.model.train()
        total_loss = 0

        for batch_idx, (data, target) in enumerate(dataloader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)

            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(dataloader)

    def evaluate(self, dataloader: DataLoader) -> dict:
        """Evaluate model"""
        self.model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for data, target in dataloader:
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

        return {
            "accuracy": 100 * correct / total,
            "total_samples": total
        }

    def train(self, train_loader: DataLoader, val_loader: DataLoader,
              epochs: int = 10):
        """Full training loop"""
        history = {"train_loss": [], "val_acc": []}

        for epoch in range(epochs):
            train_loss = self.train_epoch(train_loader)
            val_metrics = self.evaluate(val_loader)

            history["train_loss"].append(train_loss)
            history["val_acc"].append(val_metrics["accuracy"])

            print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f} - Val Acc: {val_metrics['accuracy']:.2f}%")

        return history

Model Deployment

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np

app = FastAPI()

class PredictionRequest(BaseModel):
    features: list[float]

class PredictionResponse(BaseModel):
    prediction: int
    probability: float
    model_version: str

class ModelServer:
    def __init__(self, model_path: str):
        self.model_data = joblib.load(model_path)
        self.model = self.model_data["model"]
        self.scaler = self.model_data["scaler"]
        self.version = "1.0.0"

    def predict(self, features: np.ndarray) -> dict:
        """Make prediction"""
        # Scale features
        features_scaled = self.scaler.transform(features.reshape(1, -1))

        # Predict
        prediction = self.model.predict(features_scaled)[0]
        probability = self.model.predict_proba(features_scaled)[0].max()

        return {
            "prediction": int(prediction),
            "probability": float(probability),
            "model_version": self.version
        }

# Global model instance
model_server = ModelServer("model.pkl")

@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    try:
        features = np.array(request.features)
        result = model_server.predict(features)
        return PredictionResponse(**result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health():
    return {"status": "healthy", "model_version": model_server.version}

MLOps with MLflow

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

class MLflowExperiment:
    def __init__(self, experiment_name: str):
        mlflow.set_experiment(experiment_name)
        self.client = MlflowClient()

    def log_training_run(self, model, X_train, y_train, X_test, y_test,
                        params: dict):
        """Log training run with MLflow"""
        with mlflow.start_run():
            # Log parameters
            mlflow.log_params(params)

            # Train model
            model.fit(X_train, y_train)

            # Evaluate
            train_score = model.score(X_train, y_train)
            test_score = model.score(X_test, y_test)

            # Log metrics
            mlflow.log_metric("train_accuracy", train_score)
            mlflow.log_metric("test_accuracy", test_score)

            # Log model
            mlflow.sklearn.log_model(model, "model")

            # Log feature importance
            if hasattr(model, 'feature_importances_'):
                feature_importance = dict(enumerate(model.feature_importances_))
                mlflow.log_dict(feature_importance, "feature_importance.json")

            run_id = mlflow.active_run().info.run_id
            return run_id

    def register_model(self, run_id: str, model_name: str):
        """Register model in MLflow model registry"""
        model_uri = f"runs:/{run_id}/model"
        mlflow.register_model(model_uri, model_name)

    def promote_to_production(self, model_name: str, version: int):
        """Promote model version to production"""
        self.client.transition_model_version_stage(
            name=model_name,
            version=version,
            stage="Production"
        )

Best Practices

Data Preparation

Handle missing values appropriately
Scale/normalize features
Encode categorical variables properly
Split data before any preprocessing
Use stratified splits for imbalanced data
Create validation set for hyperparameter tuning

Model Training

Start with simple baselines
Use cross-validation
Monitor training and validation metrics
Implement early stopping
Save best model checkpoints
Track experiments systematically

Deployment

Version models and datasets
Monitor model performance in production
Implement model A/B testing
Set up retraining pipelines
Log predictions for analysis
Implement fallback mechanisms

Anti-Patterns

❌ Training on test data (data leakage) ❌ No validation set for hyperparameter tuning ❌ Ignoring class imbalance ❌ Not scaling features ❌ Overfitting to training data ❌ No model versioning ❌ Missing monitoring in production

ml-expert