NYC

ml-expert

SKILL.md

Machine Learning Expert

Expert guidance for machine learning systems, deep learning, model training, deployment, and MLOps practices.

Core Concepts

Machine Learning Fundamentals

  • Supervised learning (classification, regression)
  • Unsupervised learning (clustering, dimensionality reduction)
  • Reinforcement learning
  • Feature engineering
  • Model evaluation and validation
  • Hyperparameter tuning

Deep Learning

  • Neural networks (CNNs, RNNs, Transformers)
  • Transfer learning
  • Fine-tuning pre-trained models
  • Attention mechanisms
  • GANs (Generative Adversarial Networks)
  • Autoencoders

MLOps

  • Model versioning and tracking
  • Experiment management
  • Model deployment and serving
  • Monitoring and retraining
  • CI/CD for ML pipelines
  • A/B testing for models

Supervised Learning

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

class MLPipeline:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = None
        self.feature_names = None

    def prepare_data(self, X: pd.DataFrame, y: pd.Series, test_size: float = 0.2):
        """Split and scale data"""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42, stratify=y
        )

        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        self.feature_names = X.columns.tolist()

        return X_train_scaled, X_test_scaled, y_train, y_test

    def train_classifier(self, X_train, y_train, n_estimators: int = 100):
        """Train random forest classifier"""
        self.model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )

        self.model.fit(X_train, y_train)

        # Cross-validation
        cv_scores = cross_val_score(self.model, X_train, y_train, cv=5)

        return {
            "cv_mean": cv_scores.mean(),
            "cv_std": cv_scores.std(),
            "feature_importance": dict(zip(
                self.feature_names,
                self.model.feature_importances_
            ))
        }

    def evaluate(self, X_test, y_test) -> dict:
        """Evaluate model performance"""
        y_pred = self.model.predict(X_test)
        y_proba = self.model.predict_proba(X_test)

        return {
            "predictions": y_pred,
            "probabilities": y_proba,
            "confusion_matrix": confusion_matrix(y_test, y_pred).tolist(),
            "classification_report": classification_report(y_test, y_pred, output_dict=True)
        }

    def save_model(self, path: str):
        """Save model and scaler"""
        joblib.dump({
            "model": self.model,
            "scaler": self.scaler,
            "feature_names": self.feature_names
        }, path)

Deep Learning with PyTorch

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

class NeuralNetwork(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_classes: int):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

class Trainer:
    def __init__(self, model, device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.model = model.to(device)
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(model.parameters(), lr=0.001)

    def train_epoch(self, dataloader: DataLoader) -> float:
        """Train for one epoch"""
        self.model.train()
        total_loss = 0

        for batch_idx, (data, target) in enumerate(dataloader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)

            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(dataloader)

    def evaluate(self, dataloader: DataLoader) -> dict:
        """Evaluate model"""
        self.model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for data, target in dataloader:
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

        return {
            "accuracy": 100 * correct / total,
            "total_samples": total
        }

    def train(self, train_loader: DataLoader, val_loader: DataLoader,
              epochs: int = 10):
        """Full training loop"""
        history = {"train_loss": [], "val_acc": []}

        for epoch in range(epochs):
            train_loss = self.train_epoch(train_loader)
            val_metrics = self.evaluate(val_loader)

            history["train_loss"].append(train_loss)
            history["val_acc"].append(val_metrics["accuracy"])

            print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f} - Val Acc: {val_metrics['accuracy']:.2f}%")

        return history

Model Deployment

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np

app = FastAPI()

class PredictionRequest(BaseModel):
    features: list[float]

class PredictionResponse(BaseModel):
    prediction: int
    probability: float
    model_version: str

class ModelServer:
    def __init__(self, model_path: str):
        self.model_data = joblib.load(model_path)
        self.model = self.model_data["model"]
        self.scaler = self.model_data["scaler"]
        self.version = "1.0.0"

    def predict(self, features: np.ndarray) -> dict:
        """Make prediction"""
        # Scale features
        features_scaled = self.scaler.transform(features.reshape(1, -1))

        # Predict
        prediction = self.model.predict(features_scaled)[0]
        probability = self.model.predict_proba(features_scaled)[0].max()

        return {
            "prediction": int(prediction),
            "probability": float(probability),
            "model_version": self.version
        }

# Global model instance
model_server = ModelServer("model.pkl")

@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    try:
        features = np.array(request.features)
        result = model_server.predict(features)
        return PredictionResponse(**result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health():
    return {"status": "healthy", "model_version": model_server.version}

MLOps with MLflow

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

class MLflowExperiment:
    def __init__(self, experiment_name: str):
        mlflow.set_experiment(experiment_name)
        self.client = MlflowClient()

    def log_training_run(self, model, X_train, y_train, X_test, y_test,
                        params: dict):
        """Log training run with MLflow"""
        with mlflow.start_run():
            # Log parameters
            mlflow.log_params(params)

            # Train model
            model.fit(X_train, y_train)

            # Evaluate
            train_score = model.score(X_train, y_train)
            test_score = model.score(X_test, y_test)

            # Log metrics
            mlflow.log_metric("train_accuracy", train_score)
            mlflow.log_metric("test_accuracy", test_score)

            # Log model
            mlflow.sklearn.log_model(model, "model")

            # Log feature importance
            if hasattr(model, 'feature_importances_'):
                feature_importance = dict(enumerate(model.feature_importances_))
                mlflow.log_dict(feature_importance, "feature_importance.json")

            run_id = mlflow.active_run().info.run_id
            return run_id

    def register_model(self, run_id: str, model_name: str):
        """Register model in MLflow model registry"""
        model_uri = f"runs:/{run_id}/model"
        mlflow.register_model(model_uri, model_name)

    def promote_to_production(self, model_name: str, version: int):
        """Promote model version to production"""
        self.client.transition_model_version_stage(
            name=model_name,
            version=version,
            stage="Production"
        )

Best Practices

Data Preparation

  • Handle missing values appropriately
  • Scale/normalize features
  • Encode categorical variables properly
  • Split data before any preprocessing
  • Use stratified splits for imbalanced data
  • Create validation set for hyperparameter tuning

Model Training

  • Start with simple baselines
  • Use cross-validation
  • Monitor training and validation metrics
  • Implement early stopping
  • Save best model checkpoints
  • Track experiments systematically

Deployment

  • Version models and datasets
  • Monitor model performance in production
  • Implement model A/B testing
  • Set up retraining pipelines
  • Log predictions for analysis
  • Implement fallback mechanisms

Anti-Patterns

❌ Training on test data (data leakage) ❌ No validation set for hyperparameter tuning ❌ Ignoring class imbalance ❌ Not scaling features ❌ Overfitting to training data ❌ No model versioning ❌ Missing monitoring in production

Resources

Weekly Installs
30
First Seen
Jan 24, 2026
Installed on
claude-code23
opencode23
codex19
gemini-cli18
cursor18
antigravity17