pydantic-models

SKILL.md

Pydantic Models

Build type-safe, validated data structures using Pydantic for configuration, API payloads, and domain models.

When to Use This Skill

  • Creating validated data models
  • Building configuration/settings systems
  • Defining API request/response schemas
  • Parsing external data (JSON, YAML, env vars)
  • Implementing domain entities with validation

Pydantic v2 Fundamentals

Basic Model Structure

from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime

class User(BaseModel):
    """User domain model with validation."""

    id: int
    username: str = Field(..., min_length=3, max_length=50)
    email: str = Field(..., pattern=r"^[\w\.-]+@[\w\.-]+\.\w+$")
    is_active: bool = True
    created_at: datetime = Field(default_factory=datetime.utcnow)
    bio: Optional[str] = Field(None, max_length=500)

Model Configuration

from pydantic import BaseModel, ConfigDict

class StrictModel(BaseModel):
    """Model with strict configuration."""

    model_config = ConfigDict(
        # Validation behavior
        strict=True,              # No type coercion
        validate_assignment=True, # Validate on attribute set
        validate_default=True,    # Validate default values

        # Serialization
        populate_by_name=True,    # Allow alias or field name
        use_enum_values=True,     # Serialize enums as values

        # Schema
        extra="forbid",           # Reject unknown fields
        frozen=True,              # Immutable model

        # JSON
        ser_json_bytes="base64",  # How to serialize bytes
    )

    name: str

Field Definitions

Field with Constraints

from pydantic import BaseModel, Field
from typing import Annotated
from decimal import Decimal

class Product(BaseModel):
    # Required field with constraints
    name: str = Field(
        ...,  # ... means required
        min_length=1,
        max_length=100,
        description="Product name",
        examples=["Widget", "Gadget"],
    )

    # Numeric constraints
    price: Decimal = Field(
        ...,
        gt=0,          # Greater than
        le=10000,      # Less than or equal
        decimal_places=2,
    )

    quantity: int = Field(
        default=0,
        ge=0,          # Greater than or equal
    )

    # String pattern
    sku: str = Field(
        ...,
        pattern=r"^[A-Z]{2}-\d{4}$",
        description="SKU format: XX-0000",
    )

Annotated Types (Preferred in v2)

from pydantic import BaseModel
from typing import Annotated
from pydantic import Field, StringConstraints

# Reusable type annotations
Username = Annotated[str, StringConstraints(min_length=3, max_length=50, pattern=r"^[a-z_]+$")]
PositiveInt = Annotated[int, Field(gt=0)]
Email = Annotated[str, StringConstraints(pattern=r"^[\w\.-]+@[\w\.-]+\.\w+$")]

class User(BaseModel):
    username: Username
    age: PositiveInt
    email: Email

Validators

Field Validators

from pydantic import BaseModel, field_validator, ValidationInfo
from typing import Any

class Order(BaseModel):
    items: list[str]
    total: float

    @field_validator("items")
    @classmethod
    def items_not_empty(cls, v: list[str]) -> list[str]:
        if not v:
            raise ValueError("Order must have at least one item")
        return v

    @field_validator("total", mode="before")
    @classmethod
    def round_total(cls, v: Any) -> float:
        """Pre-validation: transform before type coercion."""
        if isinstance(v, (int, float)):
            return round(float(v), 2)
        return v

    @field_validator("total")
    @classmethod
    def total_positive(cls, v: float, info: ValidationInfo) -> float:
        """Access other field values via info.data."""
        if v <= 0:
            raise ValueError("Total must be positive")
        return v

Model Validators

from pydantic import BaseModel, model_validator
from typing import Self

class DateRange(BaseModel):
    start_date: date
    end_date: date

    @model_validator(mode="after")
    def validate_date_range(self) -> Self:
        """Validate after all fields are set."""
        if self.end_date < self.start_date:
            raise ValueError("end_date must be after start_date")
        return self

    @model_validator(mode="before")
    @classmethod
    def handle_legacy_format(cls, data: Any) -> Any:
        """Transform data before validation."""
        if isinstance(data, dict) and "date_range" in data:
            # Handle legacy format
            start, end = data["date_range"].split("/")
            return {"start_date": start, "end_date": end}
        return data

Computed Fields

from pydantic import BaseModel, computed_field
from datetime import date

class Person(BaseModel):
    first_name: str
    last_name: str
    birth_date: date

    @computed_field
    @property
    def full_name(self) -> str:
        return f"{self.first_name} {self.last_name}"

    @computed_field
    @property
    def age(self) -> int:
        today = date.today()
        return today.year - self.birth_date.year

Settings Management

BaseSettings for Configuration

from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, SecretStr
from functools import lru_cache

class Settings(BaseSettings):
    """Application settings from environment variables."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        env_prefix="APP_",           # APP_DATABASE_URL -> database_url
        env_nested_delimiter="__",   # APP_DB__HOST -> db.host
        case_sensitive=False,
        extra="ignore",
    )

    # Required settings
    database_url: str
    secret_key: SecretStr

    # Settings with defaults
    debug: bool = False
    log_level: str = "INFO"

    # Nested settings
    redis_host: str = "localhost"
    redis_port: int = 6379

    # Computed settings
    @property
    def redis_url(self) -> str:
        return f"redis://{self.redis_host}:{self.redis_port}"


@lru_cache
def get_settings() -> Settings:
    """Cached settings instance."""
    return Settings()

Environment File (.env)

# .env
APP_DATABASE_URL=postgresql://user:pass@localhost/db
APP_SECRET_KEY=super-secret-key
APP_DEBUG=true
APP_LOG_LEVEL=DEBUG

Nested Settings

from pydantic import BaseModel
from pydantic_settings import BaseSettings

class DatabaseSettings(BaseModel):
    host: str = "localhost"
    port: int = 5432
    name: str = "app"
    user: str = "postgres"
    password: SecretStr = SecretStr("")

    @property
    def url(self) -> str:
        return f"postgresql://{self.user}:{self.password.get_secret_value()}@{self.host}:{self.port}/{self.name}"

class CacheSettings(BaseModel):
    host: str = "localhost"
    port: int = 6379
    ttl: int = 3600

class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_nested_delimiter="__",
    )

    database: DatabaseSettings = DatabaseSettings()
    cache: CacheSettings = CacheSettings()

# Use: DATABASE__HOST=myhost DATABASE__PORT=5433

Serialization

JSON Serialization

from pydantic import BaseModel
from datetime import datetime

class Event(BaseModel):
    name: str
    timestamp: datetime

# Serialize to dict
event = Event(name="Test", timestamp=datetime.now())
event.model_dump()  # {'name': 'Test', 'timestamp': datetime(...)}

# Serialize to JSON string
event.model_dump_json()  # '{"name": "Test", "timestamp": "2024-..."}'

# Control serialization
event.model_dump(
    mode="json",           # JSON-compatible types
    exclude_none=True,     # Skip None values
    exclude_unset=True,    # Skip values not explicitly set
    by_alias=True,         # Use field aliases
    include={"name"},      # Only include specific fields
    exclude={"timestamp"}, # Exclude specific fields
)

Custom Serialization

from pydantic import BaseModel, field_serializer, model_serializer
from datetime import datetime
from enum import Enum

class Status(Enum):
    ACTIVE = "active"
    INACTIVE = "inactive"

class Record(BaseModel):
    status: Status
    created_at: datetime

    @field_serializer("created_at")
    def serialize_datetime(self, value: datetime) -> str:
        return value.strftime("%Y-%m-%d %H:%M")

    @field_serializer("status")
    def serialize_status(self, value: Status) -> str:
        return value.value.upper()

Aliases for API Compatibility

from pydantic import BaseModel, Field

class APIResponse(BaseModel):
    """Model with aliases for API compatibility."""

    model_config = ConfigDict(populate_by_name=True)

    user_id: int = Field(..., alias="userId")
    first_name: str = Field(..., alias="firstName")
    created_at: datetime = Field(..., alias="createdAt")

# Parse from camelCase API
data = {"userId": 1, "firstName": "John", "createdAt": "2024-01-01T00:00:00"}
response = APIResponse.model_validate(data)

# Access with Python names
print(response.user_id)  # 1

# Serialize back to camelCase
response.model_dump(by_alias=True)

Common Patterns

Discriminated Unions

from pydantic import BaseModel, Field
from typing import Literal, Union
from typing import Annotated

class Cat(BaseModel):
    pet_type: Literal["cat"]
    meows: int

class Dog(BaseModel):
    pet_type: Literal["dog"]
    barks: float

# Discriminated union with explicit discriminator
Pet = Annotated[Union[Cat, Dog], Field(discriminator="pet_type")]

class Household(BaseModel):
    pets: list[Pet]

# Validation correctly identifies type
data = {"pets": [{"pet_type": "cat", "meows": 4}, {"pet_type": "dog", "barks": 3.5}]}
household = Household.model_validate(data)

Generic Models

from pydantic import BaseModel
from typing import TypeVar, Generic

T = TypeVar("T")

class PaginatedResponse(BaseModel, Generic[T]):
    items: list[T]
    total: int
    page: int
    page_size: int

    @computed_field
    @property
    def total_pages(self) -> int:
        return (self.total + self.page_size - 1) // self.page_size

# Use with specific types
class UserResponse(BaseModel):
    id: int
    name: str

# PaginatedResponse[UserResponse] is now a valid type
def get_users() -> PaginatedResponse[UserResponse]:
    ...

Partial Models (Optional Fields for Updates)

from pydantic import BaseModel, create_model
from typing import Optional

class UserCreate(BaseModel):
    username: str
    email: str
    password: str

# Create partial model for updates
def make_partial(model: type[BaseModel]) -> type[BaseModel]:
    """Make all fields optional for PATCH operations."""
    fields = {
        name: (Optional[info.annotation], None)
        for name, info in model.model_fields.items()
    }
    return create_model(f"Partial{model.__name__}", **fields)

UserUpdate = make_partial(UserCreate)
# Now UserUpdate has all optional fields

Factory Pattern

from pydantic import BaseModel
from typing import ClassVar

class NotificationBase(BaseModel):
    """Base notification with factory method."""

    message: str
    recipient: str

    # Registry for subclasses
    _registry: ClassVar[dict[str, type["NotificationBase"]]] = {}

    def __init_subclass__(cls, notification_type: str = None, **kwargs):
        super().__init_subclass__(**kwargs)
        if notification_type:
            cls._registry[notification_type] = cls

    @classmethod
    def create(cls, notification_type: str, **data) -> "NotificationBase":
        """Factory method to create appropriate notification type."""
        if notification_type not in cls._registry:
            raise ValueError(f"Unknown notification type: {notification_type}")
        return cls._registry[notification_type](**data)

class EmailNotification(NotificationBase, notification_type="email"):
    subject: str

class SMSNotification(NotificationBase, notification_type="sms"):
    phone_number: str

# Usage
notification = NotificationBase.create(
    "email",
    message="Hello",
    recipient="user@example.com",
    subject="Welcome"
)

Testing Pydantic Models

import pytest
from pydantic import ValidationError

def test_valid_model():
    user = User(username="john", email="john@example.com")
    assert user.username == "john"

def test_invalid_email():
    with pytest.raises(ValidationError) as exc_info:
        User(username="john", email="invalid")

    errors = exc_info.value.errors()
    assert len(errors) == 1
    assert errors[0]["type"] == "string_pattern_mismatch"

def test_model_serialization():
    user = User(username="john", email="john@example.com")
    data = user.model_dump()

    assert data["username"] == "john"
    assert "password" not in data  # If excluded

def test_model_from_json():
    json_data = '{"username": "john", "email": "john@example.com"}'
    user = User.model_validate_json(json_data)

    assert user.username == "john"

Guidelines

  • Use Pydantic v2 syntax (model_validate, model_dump, ConfigDict)
  • Prefer Annotated types for reusable constraints
  • Use Field(...) for required fields with constraints
  • Implement field_validator for single-field validation
  • Implement model_validator for cross-field validation
  • Use computed_field for derived properties
  • Use BaseSettings for configuration management
  • Always handle ValidationError in API endpoints
  • Use SecretStr for sensitive data
  • Set extra="forbid" to catch typos in input data
Weekly Installs
1
First Seen
12 days ago
Installed on
mcpjam1
claude-code1
junie1
windsurf1
zencoder1
crush1